I am having trouble building a workflow that will scrape the DailyMed website. I have gotten the workflow to run but it stops between the Extract Drug Records and Split Baches nodes on run “3”. Any help figuring out why would be greatly appreciated!
Workflow file here: {
"name": "DailyMed 2025 Ingestion (Complete)",
"nodes": [
{
"parameters": {
"content": "## DailyMed Data Ingestion Complete\nProcessed all {{$node[\"Process SPL Data\"].json.metadata.total_pages}} pages of data with {{$node[\"Process SPL Data\"].json.metadata.total_elements}} SPL records.",
"height": 236.5167322834646,
"width": 435.1260236220472
},
"id": "7e597d72-42b6-49b5-a0d6-6c66d3d9d85f",
"name": "Process Complete",
"type": "n8n-nodes-base.stickyNote",
"typeVersion": 1,
"position": [
-64,
-192
]
},
{
"parameters": {
"triggerTimes": {
"item": [
{
"mode": "custom"
}
]
}
},
"id": "21bdae6d-480d-4b28-84bc-6835265687ed",
"name": "Cron Trigger",
"type": "n8n-nodes-base.cron",
"typeVersion": 1,
"position": [
-144,
368
],
"disabled": true
},
{
"parameters": {
"values": {
"string": [
{
"name": "start_date",
"value": "2025-01-01"
},
{
"name": "page_size",
"value": "100"
},
{
"name": "current_page",
"value": "1"
}
]
},
"options": {}
},
"id": "82bda56e-11b7-41d9-9b9b-8376b0eeeb25",
"name": "Set Initial Vars",
"type": "n8n-nodes-base.set",
"typeVersion": 2,
"position": [
112,
576
]
},
{
"parameters": {
"url": "=https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json?page={{$json.current_page}}&pagesize=100",
"sendHeaders": true,
"specifyHeaders": "json",
"jsonHeaders": "{\n \"Accept\": \"application/json\",\n \"User-Agent\": \"n8n-workflow/1.0\",\n \"Content-Type\": \"application/json\"\n}",
"options": {}
},
"id": "dd9356a3-dde7-4f94-a88c-d446d511cbf5",
"name": "Fetch SPL List",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 3,
"position": [
528,
576
]
},
{
"parameters": {
"batchSize": 50,
"options": {}
},
"id": "c52eee0f-4816-428e-8046-f98c9e98529f",
"name": "Split Batches",
"type": "n8n-nodes-base.splitInBatches",
"typeVersion": 1,
"position": [
1280,
576
]
},
{
"parameters": {
"functionCode": "// This node receives a SINGLE drug record from the batch\n// and constructs its detail URL.\n\ntry {\n // 'items[0].json' is the individual drug record.\n // Use null coalescing to handle cases where items[0] might not exist\n const drugRecord = items?.[0]?.json || {};\n\n // Detailed logging for debugging\n console.log(`Processing record: ${JSON.stringify(drugRecord).substring(0, 200)}...`);\n \n // --- ENHANCED SAFETY CHECK ---\n // Check if we have a valid setid and handle gracefully if not\n if (!drugRecord.setid) {\n console.log(\"Warning: Item missing 'setid'. Creating record with null values.\");\n \n // Instead of stopping, return a placeholder record with null values\n // This allows the workflow to continue to the next record\n return [{\n json: {\n url: null,\n setid: null,\n title: drugRecord.title || null,\n published_date: drugRecord.published_date || null,\n _error: \"Missing setid in source record\"\n }\n }];\n }\n\n // Extract the 'setid' directly from the drug record.\n const setid = drugRecord.setid;\n\n // Construct the correct detail URL.\n const detailUrl = `https://dailymed.nlm.nih.gov/dailymed/drugInfo.cfm?setid=${setid}&audience=consumer`;\n\n // Handle potential missing fields with defaults\n const title = drugRecord.title || null;\n const publishedDate = drugRecord.published_date || null;\n \n // Return a new item that contains the URL and passes through the other data.\n // This will be sent to the 'Fetch Detail' node.\n return [{\n json: {\n url: detailUrl,\n setid: setid,\n title: title,\n published_date: publishedDate\n }\n }];\n \n} catch (error) {\n // Catch any unexpected errors to prevent workflow stoppage\n console.error(`Error in Map Detail URL: ${error.message}`);\n \n // Return a record with error information but don't stop the workflow\n return [{\n json: {\n url: null,\n setid: null,\n title: null,\n published_date: null,\n _error: `Unexpected error: ${error.message}`\n }\n }];\n}"
},
"id": "470e3fcb-91b9-4d29-85f2-12497c45acaa",
"name": "Map Detail URL",
"type": "n8n-nodes-base.function",
"typeVersion": 1,
"position": [
1520,
576
]
},
{
"parameters": {
"url": "={{ $json.url }}",
"sendHeaders": true,
"specifyHeaders": "json",
"jsonHeaders": "{\n \"Accept\": \"text/html\", \n \"User-Agent\": \"n8n-workflow/1.0\"\n}",
"options": {
"response": {},
"timeout": 30000
}
},
"id": "5e905c42-a239-47bc-8c8a-7e204863cc6d",
"name": "Fetch Detail",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 3,
"position": [
1776,
576
]
},
{
"parameters": {
"functionCode": "// This node now checks for the \"dummy\" item from empty pages.\n\nconst currentItem = items[0].json;\n\n// --- THIS IS THE NEW CHECK ---\n// If this is the dummy item, stop right here.\n// This prevents it from being sent to the database but allows the workflow\n// to continue to the 'Check Next Page' node.\nif (currentItem.isEmptyPage) {\n console.log(\"Dummy item from empty page detected. Passing metadata to loop check.\");\n return [{ json: { metadata: currentItem.metadata } }];\n}\n\n// If it's a normal item, proceed with the original logic.\nconst rawHtmlData = currentItem.data;\nconst mapNodeItem = $('Map Detail URL').first().json;\nconst fetchNodeMetadata = $('Fetch SPL List').first().json.metadata;\n\nif (!mapNodeItem || !mapNodeItem.setid || !fetchNodeMetadata) {\n console.log(\"Could not find required data from previous steps. Skipping item.\");\n return [];\n}\n\nconst finalObject = {\n setid: mapNodeItem.setid,\n title: mapNodeItem.title,\n published_date: mapNodeItem.published_date,\n raw_xml_data: rawHtmlData,\n version_number: 1,\n document_type: 'SPL',\n author_organization: 'Unknown',\n metadata: fetchNodeMetadata\n};\n\nreturn [{\n json: finalObject\n}];"
},
"id": "b5e83dd9-300a-4cee-8f62-9fc063d2993d",
"name": "Transform",
"type": "n8n-nodes-base.function",
"typeVersion": 1,
"position": [
2592,
576
]
},
{
"parameters": {
"schema": {
"__rl": true,
"mode": "list",
"value": "public"
},
"table": {
"__rl": true,
"value": "spl_documents",
"mode": "list",
"cachedResultName": "spl_documents"
},
"dataMode": "defineBelow",
"valuesToSend": {
"values": [
{
"column": "setid",
"value": "={{ $json.setid }}"
},
{
"column": "version_number",
"value": "={{ $json.version_number }}"
},
{
"column": "document_type",
"value": "={{ $json.document_type }}"
},
{
"column": "title",
"value": "={{ $json.title }}"
},
{
"column": "author_organization",
"value": "={{ $json.author_organization }}"
},
{
"column": "raw_xml_data",
"value": "={{ $json.raw_xml_data }}"
},
{
"column": "effective_time",
"value": "={{ $json.effective_time }}"
}
]
},
"options": {
"skipOnConflict": true
}
},
"id": "4572d122-746c-4fa4-9bf4-4ca3a8567257",
"name": "Insert SPL",
"type": "n8n-nodes-base.postgres",
"typeVersion": 2,
"position": [
2816,
576
],
"credentials": {
"postgres": {
"id": "6boJEIxDS1zafXqe",
"name": "Postgres account 4"
}
}
},
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
-112,
752
],
"id": "edc3a630-bd8e-4661-baa2-c0563145d281",
"name": "When clicking 'Execute workflow'"
},
{
"parameters": {
"jsCode": "// Copy the variables from the initial set\nconst currentPage = items[0].json.current_page || 1;\nconst pageSize = items[0].json.page_size || 100;\n\n// Return the variables to be used in the next run\nreturn [\n {\n json: {\n current_page: currentPage,\n page_size: pageSize,\n start_date: items[0].json.start_date || '2025-01-01'\n }\n }\n];"
},
"id": "226870d8-25cb-4f4d-8a38-66cb2defc904",
"name": "Set Page Variables",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
336,
576
]
},
{
"parameters": {
"jsCode": "// This node reliably gets the metadata from the 'Transform' node's output.\n\n// Look at the item that came from the 'Transform' node.\nconst transformOutput = $('Transform').first().json;\n\nif (!transformOutput || !transformOutput.metadata) {\n console.log(\"Could not find metadata from 'Transform' node. Stopping loop.\");\n return [];\n}\n\nconst metadata = transformOutput.metadata;\nconst currentPage = parseInt(metadata.current_page);\nconst totalPages = parseInt(metadata.total_pages);\n\nif (currentPage < totalPages) {\n return [{\n json: {\n current_page: currentPage + 1\n }\n }];\n} else {\n console.log(\"All pages processed. Workflow complete.\");\n return [];\n}"
},
"id": "c088ed02-9877-4015-b77b-9ad08acfbd64",
"name": "Check Next Page",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
3136,
96
]
},
{
"parameters": {
"conditions": {
"boolean": [
{
"value1": "={{ !!$json.current_page && $json.current_page > 1 }}",
"value2": true
}
]
}
},
"id": "edc06bef-c4c1-48fe-9fa3-da8131925d27",
"name": "Should Continue Pagination",
"type": "n8n-nodes-base.if",
"typeVersion": 1,
"position": [
1344,
-208
]
},
{
"parameters": {
"jsCode": "// This node processes response data and handles various error conditions\n\nconst response = items[0].json;\n\n// Check if we have a valid response object\nif (!response) {\n console.log(\"Warning: Response is undefined or null. Returning dummy item.\");\n return [{\n json: {\n isErrorRecord: true,\n errorType: \"null_response\",\n errorMessage: \"API response was null or undefined\",\n metadata: {} // Empty metadata to prevent downstream errors\n }\n }];\n}\n\n// Check for API error responses\nif (response.error || response.statusCode >= 400) {\n console.log(`API error detected: ${response.error || response.statusCode}`);\n return [{\n json: {\n isErrorRecord: true,\n errorType: \"api_error\",\n errorMessage: response.error || `Status code: ${response.statusCode}`,\n metadata: response.metadata || {} // Pass metadata if available\n }\n }];\n}\n\n// Case 1: The page has valid records.\nif (response && Array.isArray(response.data) && response.data.length > 0) {\n console.log(`Found ${response.data.length} records to process.`);\n \n // Process each record with error handling\n const drugRecordItems = response.data.map(record => {\n // If the record is missing crucial data or is malformed\n if (!record || typeof record !== 'object') {\n console.log(\"Warning: Invalid record structure detected. Adding null record.\");\n return {\n json: {\n isNullRecord: true,\n errorType: \"invalid_structure\",\n errorMessage: \"Record is null or not an object\",\n metadata: response.metadata // Keep the metadata for pagination\n }\n };\n }\n \n // Handle records with missing required fields\n if (!record.setid) {\n console.log(\"Warning: Record missing setid. Adding invalid record marker.\");\n return {\n json: {\n isInvalidRecord: true,\n errorType: \"missing_field\",\n errorMessage: \"Record missing required 'setid' field\",\n originalData: record, // Keep original data for debugging\n metadata: response.metadata // Keep the metadata for pagination\n }\n };\n }\n \n // If record passes validation, return it normally\n return { json: record };\n });\n \n return drugRecordItems;\n}\n\n// Case 2: The page is empty or the data is invalid.\nconsole.log(\"Empty page detected. Creating a dummy item to continue the loop.\");\nreturn [{\n json: {\n isEmptyPage: true,\n metadata: response.metadata || {} // Use empty object if metadata is undefined\n }\n}];"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1056,
576
],
"id": "867dc434-1ac1-4e51-ad27-23994603b70a",
"name": "Extract Drug Records"
},
{
"parameters": {
"jsCode": "// This node handles errors and validates the SPL List API response\n// Add this Function node immediately after the \"Fetch SPL List\" node\n\n// Get response from HTTP Request node\nconst response = items[0].json;\nconst requestUrl = items[0].parameters?.url || \"unknown URL\";\n\n// Detailed logging\nconsole.log(`Processing SPL list response from page ${$('Set Page Variables').first().json.current_page}`);\n\n// Check if we have a response at all\nif (!response) {\n console.log(`[ERROR] API request failed with no response for ${requestUrl}`);\n // Return a structured error object but allow workflow to continue\n return [{\n json: {\n data: [],\n metadata: {\n current_page: $('Set Page Variables').first().json.current_page || 1,\n total_pages: 1,\n total_elements: 0,\n error: true,\n error_message: \"No response received from API\"\n }\n }\n }];\n}\n\n// Check for HTTP errors\nif (response.statusCode >= 400) {\n console.log(`[ERROR] API error: ${response.statusCode} for ${requestUrl}`);\n return [{\n json: {\n data: [],\n metadata: {\n current_page: $('Set Page Variables').first().json.current_page || 1,\n total_pages: 1,\n total_elements: 0,\n error: true,\n error_message: `API returned status code ${response.statusCode}`\n }\n }\n }];\n}\n\n// Make sure the response has the expected structure\nif (!response.metadata) {\n console.log(\"[ERROR] API returned response without metadata\");\n return [{\n json: {\n data: Array.isArray(response.data) ? response.data : [],\n metadata: {\n current_page: $('Set Page Variables').first().json.current_page || 1,\n total_pages: 1,\n total_elements: 0,\n error: true,\n error_message: \"API returned response without metadata\"\n }\n }\n }];\n}\n\nif (!Array.isArray(response.data)) {\n console.log(\"[ERROR] API returned non-array data\");\n return [{\n json: {\n data: [],\n metadata: response.metadata || {\n current_page: $('Set Page Variables').first().json.current_page || 1,\n total_pages: 1,\n total_elements: 0\n }\n }\n }];\n}\n\n// Log success metrics\nconsole.log(`[SUCCESS] Retrieved ${response.data.length} SPL records from page ${response.metadata.current_page} of ${response.metadata.total_pages}`);\n\n// If everything looks good, return the response\nreturn [{ json: response }];"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
736,
576
],
"id": "30bd3b95-8c46-4399-897b-274ccd2f9220",
"name": "Process SPL Respnse"
},
{
"parameters": {
"jsCode": "// This node handles errors and validates the Detail API response\n// Add this Function node immediately after the \"Fetch Detail\" node\n\n// Get response from HTTP Request node\nconst response = items[0].json;\nconst requestUrl = items[0].parameters?.url || \"unknown URL\";\nconst setid = $('Map Detail URL').first()?.json?.setid || null;\n\n// Detailed logging\nconsole.log(`Processing detail response for setid: ${setid}`);\n\n// Check if we have a response at all\nif (!response) {\n console.log(`[ERROR] Detail fetch failed for ${requestUrl}`);\n // Return structured data with nulls but keep the workflow moving\n return [{\n json: {\n data: null,\n setid: setid,\n title: $('Map Detail URL').first()?.json?.title || null,\n published_date: $('Map Detail URL').first()?.json?.published_date || null,\n _error: \"No response received\"\n }\n }];\n}\n\n// Check for HTTP errors\nif (response.statusCode >= 400) {\n console.log(`[ERROR] Detail fetch error ${response.statusCode} for ${requestUrl}`);\n return [{\n json: {\n data: null,\n setid: setid,\n title: $('Map Detail URL').first()?.json?.title || null,\n published_date: $('Map Detail URL').first()?.json?.published_date || null,\n _error: `Failed with status ${response.statusCode}`\n }\n }];\n}\n\n// Check if response contains data (content)\nif (!response.data || typeof response.data !== 'string' || response.data.length === 0) {\n console.log(`[ERROR] Empty or invalid content received for ${requestUrl}`);\n return [{\n json: {\n data: null,\n setid: setid,\n title: $('Map Detail URL').first()?.json?.title || null,\n published_date: $('Map Detail URL').first()?.json?.published_date || null,\n _error: \"Empty or invalid content received\"\n }\n }];\n}\n\n// Log success\nconsole.log(`[SUCCESS] Retrieved detail data for ${setid}, content length: ${response.data.length}`);\n\n// If everything looks good, return the response\nreturn [{ json: response }];"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1984,
576
],
"id": "eb832eb0-873b-4886-94cd-ad0f4242de6e",
"name": "Code in JavaScript"
}
],
"pinData": {
"Check Next Page": [
{
"json": {
"current_page": 2
}
}
]
},
"connections": {
"Cron Trigger": {
"main": [
[
{
"node": "Set Initial Vars",
"type": "main",
"index": 0
}
]
]
},
"Set Initial Vars": {
"main": [
[
{
"node": "Set Page Variables",
"type": "main",
"index": 0
}
]
]
},
"Fetch SPL List": {
"main": [
[
{
"node": "Process SPL Respnse",
"type": "main",
"index": 0
}
]
]
},
"Split Batches": {
"main": [
[
{
"node": "Map Detail URL",
"type": "main",
"index": 0
}
]
]
},
"Map Detail URL": {
"main": [
[
{
"node": "Fetch Detail",
"type": "main",
"index": 0
}
]
]
},
"Fetch Detail": {
"main": [
[
{
"node": "Code in JavaScript",
"type": "main",
"index": 0
}
]
]
},
"Transform": {
"main": [
[
{
"node": "Insert SPL",
"type": "main",
"index": 0
}
]
]
},
"When clicking 'Execute workflow'": {
"main": [
[
{
"node": "Set Initial Vars",
"type": "main",
"index": 0
}
]
]
},
"Set Page Variables": {
"main": [
[
{
"node": "Fetch SPL List",
"type": "main",
"index": 0
}
]
]
},
"Check Next Page": {
"main": [
[
{
"node": "Should Continue Pagination",
"type": "main",
"index": 0
}
]
]
},
"Should Continue Pagination": {
"main": [
[
{
"node": "Set Page Variables",
"type": "main",
"index": 0
}
]
]
},
"Insert SPL": {
"main": [
[
{
"node": "Check Next Page",
"type": "main",
"index": 0
}
]
]
},
"Extract Drug Records": {
"main": [
[
{
"node": "Split Batches",
"type": "main",
"index": 0
}
]
]
},
"Process SPL Respnse": {
"main": [
[
{
"node": "Extract Drug Records",
"type": "main",
"index": 0
}
]
]
},
"Code in JavaScript": {
"main": [
[
{
"node": "Transform",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "dd960b47-d2e5-41a6-a22b-8a37f4e8e5f7",
"meta": {
"templateCredsSetupCompleted": true,
"instanceId": "1837fa2740db4dce458057271ad73bf299b16785a1e4610e5d5631ea7309a8b3"
},
"id": "DFzUBddcnsKmtOWs",
"tags": [
{
"createdAt": "2025-09-29T11:15:35.326Z",
"updatedAt": "2025-09-29T11:15:35.326Z",
"id": "HawYC8dturzZqb5v",
"name": "DailyMed"
},
{
"createdAt": "2025-09-29T11:15:35.349Z",
"updatedAt": "2025-09-29T11:15:35.349Z",
"id": "JDNnRTQjzhPnSoS2",
"name": "API"
}
]
}
