Hi,
This puppeteer node is unable to generate correct pagination. It only gives 1 page result and then stops.
Here’s the next page pagination I found on the website HTML:
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=2" data-page="2" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=3" data-page="3" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=4" data-page="4" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
const results = [];
for (const item of $input.all()) {
const baseUrl = item.json.baseUrl;
let pageCount = 1;
console.log(`🚀 Starting: ${baseUrl}`);
await $page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
while (true) {
console.log(`📄 Scraping page ${pageCount}`);
console.log('⏳ Waiting for listings to appear...');
await $page.waitForSelector('div.result', { timeout: 60000 });
console.log('✅ Listings appeared.');
// Check for "No results"
const heading = await $page.$eval('h1', el => el.textContent.trim()).catch(() => '');
if (heading.toLowerCase().includes('no results found')) {
console.log('✅ No results found in heading. Exiting.');
break;
}
const tag = await $page.$eval('#location', el => el.value.trim()).catch(() => '');
// Extract listings
const listings = await $page.$$eval('div.result', (items, tag, baseUrl, pageCount) => {
return items.map(item => {
const getText = sel => item.querySelector(sel)?.textContent.trim() || '';
const getHref = sel => item.querySelector(sel)?.getAttribute('href') || '';
const ypPartial = getHref('a.business-name');
const websiteUrl = getHref('div.links a.track-visit-website');
return {
Name: getText('a.business-name span'),
"Yp link": ypPartial ? 'https://www.yellowpages.com' + ypPartial : '',
"Website Url": websiteUrl || '',
Tag: tag,
baseUrl,
page: pageCount
};
});
}, tag, baseUrl, pageCount);
results.push(...listings.map(l => ({ json: l })));
// Check for next page button
const nextBtn = await $page.$('a.next.ajax-page').catch(() => null);
if (!nextBtn) {
console.log('✅ No next button. Done.');
break;
}
console.log('⏳ Waiting for Next button to be visible...');
await $page.waitForSelector('a.next.ajax-page', { visible: true, timeout: 60000 });
console.log('✅ Next button visible.');
// Store current first business name to detect change
const firstNameBefore = await $page.$eval('a.business-name span', el => el.textContent.trim()).catch(() => '');
// Prepare to wait for AJAX response after clicking Next
const waitResponsePromise = $page.waitForResponse(response =>
response.url().includes('/search?') && response.status() === 200
);
console.log('➡️ Clicking Next button...');
await nextBtn.click();
try {
await Promise.race([
waitResponsePromise,
new Promise((_, reject) => setTimeout(() => reject(new Error('Response wait timed out')), 60000))
]);
console.log('✅ AJAX response received.');
} catch (e) {
console.warn('⚠️ AJAX response wait timed out, proceeding anyway.');
}
// Manual wait for DOM to update (replace waitForTimeout)
await new Promise(resolve => setTimeout(resolve, 3000));
Please help me to get all the results instead of just first page
// Check if listings changed
const firstNameAfter = await $page.$eval('a.business-name span', el => el.textContent.trim()).catch(() => '');
if (!firstNameAfter || firstNameAfter === firstNameBefore) {
console.warn('⚠️ Listings did not change after clicking Next. Ending.');
break;
}
pageCount++;
}
}
return results;