Hi,
This puppeteer node is unable to generate correct pagination. It only gives 1 page result and then stops.
Here’s the next page pagination I found on the website HTML:
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=2" data-page="2" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=3" data-page="3" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
<a class="next ajax-page" href="/search?search_terms=travel%20agency&geo_location_terms=Ansonia%2C%20CT&page=4" data-page="4" data-analytics="{"click_id":132}" data-remote="true" data-impressed="1">Next</a>
const results = [];
for (const item of $input.all()) {
const baseUrl = item.json.baseUrl;
let pageCount = 1;
console.log(`🚀 Starting: ${baseUrl}`);
await $page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
while (true) {
console.log(`📄 Scraping page ${pageCount}`);
console.log('⏳ Waiting for listings to appear...');
await $page.waitForSelector('div.result', { timeout: 60000 });
console.log('✅ Listings appeared.');
// Check for "No results"
const heading = await $page.$eval('h1', el => el.textContent.trim()).catch(() => '');
if (heading.toLowerCase().includes('no results found')) {
console.log('✅ No results found in heading. Exiting.');
break;
}
const tag = await $page.$eval('#location', el => el.value.trim()).catch(() => '');
// Extract listings
const listings = await $page.$$eval('div.result', (items, tag, baseUrl, pageCount) => {
return items.map(item => {
const getText = sel => item.querySelector(sel)?.textContent.trim() || '';
const getHref = sel => item.querySelector(sel)?.getAttribute('href') || '';
const ypPartial = getHref('a.business-name');
const websiteUrl = getHref('div.links a.track-visit-website');
return {
Name: getText('a.business-name span'),
"Yp link": ypPartial ? 'https://www.yellowpages.com' + ypPartial : '',
"Website Url": websiteUrl || '',
Tag: tag,
baseUrl,
page: pageCount
};
});
}, tag, baseUrl, pageCount);
results.push(...listings.map(l => ({ json: l })));
// Check for next page button
const nextBtn = await $page.$('a.next.ajax-page').catch(() => null);
if (!nextBtn) {
console.log('✅ No next button. Done.');
break;
}
console.log('⏳ Waiting for Next button to be visible...');
await $page.waitForSelector('a.next.ajax-page', { visible: true, timeout: 60000 });
console.log('✅ Next button visible.');
// Store current first business name to detect change
const firstNameBefore = await $page.$eval('a.business-name span', el => el.textContent.trim()).catch(() => '');
// Prepare to wait for AJAX response after clicking Next
const waitResponsePromise = $page.waitForResponse(response =>
response.url().includes('/search?') && response.status() === 200
);
console.log('➡️ Clicking Next button...');
await nextBtn.click();
try {
await Promise.race([
waitResponsePromise,
new Promise((_, reject) => setTimeout(() => reject(new Error('Response wait timed out')), 60000))
]);
console.log('✅ AJAX response received.');
} catch (e) {
console.warn('⚠️ AJAX response wait timed out, proceeding anyway.');
}
// Manual wait for DOM to update (replace waitForTimeout)
await new Promise(resolve => setTimeout(resolve, 3000));
Please help me to get all the results instead of just first page
// Check if listings changed
const firstNameAfter = await $page.$eval('a.business-name span', el => el.textContent.trim()).catch(() => '');
if (!firstNameAfter || firstNameAfter === firstNameBefore) {
console.warn('⚠️ Listings did not change after clicking Next. Ending.');
break;
}
pageCount++;
}
}
return results;
Not sure I can help without enough context, but I can try. please look at this code
const results = [];
for (const item of $input.all()) {
const baseUrl = item.json.baseUrl;
let pageCount = 1;
const maxPages = 50; // Safety limit to prevent infinite loops
console.log(`🚀 Starting: ${baseUrl}`);
await $page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
while (pageCount <= maxPages) {
console.log(`📄 Scraping page ${pageCount}`);
try {
// Wait for listings to appear
console.log('⏳ Waiting for listings to appear...');
await $page.waitForSelector('div.result', { timeout: 30000 });
console.log('✅ Listings appeared.');
// Check for "No results"
const heading = await $page.$eval('h1', el => el.textContent.trim()).catch(() => '');
if (heading.toLowerCase().includes('no results found')) {
console.log('✅ No results found in heading. Exiting.');
break;
}
// Check if we have any results on this page
const listingsCount = await $page.$$eval('div.result', items => items.length);
if (listingsCount === 0) {
console.log('✅ No listings found on this page. Exiting.');
break;
}
const tag = await $page.$eval('#location', el => el.value.trim()).catch(() => '');
// Extract listings
const listings = await $page.$$eval('div.result', (items, tag, baseUrl, pageCount) => {
return items.map(item => {
const getText = sel => item.querySelector(sel)?.textContent.trim() || '';
const getHref = sel => item.querySelector(sel)?.getAttribute('href') || '';
const ypPartial = getHref('a.business-name');
const websiteUrl = getHref('div.links a.track-visit-website');
return {
Name: getText('a.business-name span'),
"Yp link": ypPartial ? 'https://www.yellowpages.com' + ypPartial : '',
"Website Url": websiteUrl || '',
Tag: tag,
baseUrl,
page: pageCount
};
});
}, tag, baseUrl, pageCount);
console.log(`✅ Extracted ${listings.length} listings from page ${pageCount}`);
results.push(...listings.map(l => ({ json: l })));
// Look for next page button - multiple approaches
let nextBtn = null;
// Try different selectors for the next button
const nextSelectors = [
'a.next.ajax-page',
'a.next',
'a[data-page]',
'.pagination a.next'
];
for (const selector of nextSelectors) {
nextBtn = await $page.$(selector).catch(() => null);
if (nextBtn) break;
}
if (!nextBtn) {
console.log('✅ No next button found. Reached last page.');
break;
}
// Check if next button is disabled
const isDisabled = await nextBtn.evaluate(el =>
el.classList.contains('disabled') ||
el.getAttribute('disabled') !== null ||
el.style.display === 'none'
).catch(() => false);
if (isDisabled) {
console.log('✅ Next button is disabled. Reached last page.');
break;
}
// Get current URL to compare after navigation
const currentUrl = await $page.url();
// Get the href of next button to construct direct URL
const nextHref = await nextBtn.getAttribute('href').catch(() => null);
if (nextHref) {
// Construct full URL for next page
const nextUrl = nextHref.startsWith('http') ? nextHref : 'https://www.yellowpages.com' + nextHref;
console.log(`➡️ Navigating to: ${nextUrl}`);
// Direct navigation approach (more reliable for AJAX sites)
await $page.goto(nextUrl, { waitUntil: 'networkidle2', timeout: 60000 });
// Verify we actually moved to a new page
const newUrl = await $page.url();
if (newUrl === currentUrl) {
console.warn('⚠️ URL did not change. Ending pagination.');
break;
}
} else {
// Fallback: click approach with proper AJAX handling
console.log('➡️ Clicking Next button...');
// Set up navigation promise before clicking
const navigationPromise = $page.waitForFunction(
(currentListings) => {
const newListings = document.querySelectorAll('div.result');
return newListings.length > 0 &&
JSON.stringify(Array.from(newListings).map(l => l.textContent)) !== currentListings;
},
{ timeout: 30000 },
await $page.$$eval('div.result', items =>
JSON.stringify(items.map(item => item.textContent))
)
);
await nextBtn.click();
try {
await navigationPromise;
console.log('✅ Page content updated after click.');
} catch (e) {
console.warn('⚠️ Timeout waiting for content update. Ending pagination.');
break;
}
}
pageCount++;
// Small delay to be respectful
await new Promise(resolve => setTimeout(resolve, 1000));
} catch (error) {
console.error(`❌ Error on page ${pageCount}:`, error.message);
break;
}
}
if (pageCount > maxPages) {
console.warn(`⚠️ Reached maximum page limit (${maxPages}). Consider using the HighLevel API instead.`);
}
}
console.log(`🎉 Total results collected: ${results.length}`);
return results;
@Bogdan1 Not worked brother, only gave first page
system
Closed
4
This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.