i need to input pdf that contain image in ai agent
Describe the problem/error/question
i need to input pdf that contain image in ai agent, i dont know how
What is the error message (if any)?
Please share your workflow
(Select the nodes on your canvas and use the keyboard shortcuts CMD+C/CTRL+C and CMD+V/CTRL+V to copy and paste the workflow.)
{
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
-288,
-32
],
"id": "1f19da08-ca63-49a0-b893-fd46124c6a39",
"name": "When clicking ‘Execute workflow’"
},
{
"parameters": {
"url": "https://www.zipcomic.com/the-amazing-spider-man-2018",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
-80,
-32
],
"id": "2c2930b4-6707-4feb-bc1c-d1a27c45bc8d",
"name": "HTTP Request"
},
{
"parameters": {
"jsCode": "/**\n * n8n Code / Function (Run once for all items)\n * Output: satu item per Issue -> { issue, link }\n * - Baca HTML dari json.body / json.data / binary base64\n * - Ekstrak anchor \"Issue #...\"\n * - Bersihkan label (Annual, HU, LR, BEY, dst.)\n * - Pilih link terbaik per issue (hindari annual/HU/LR/BEY kalau ada versi reguler)\n */\nfunction getHtmlFromItem(item) {\n if (item.json?.body) return String(item.json.body);\n if (item.json?.data) return String(item.json.data);\n const bin = item.binary?.data || item.binary?.file || item.binary?.html;\n if (bin?.data) {\n const buff = Buffer.from(bin.data, 'base64');\n return buff.toString('utf-8');\n }\n return \"\";\n}\n\nconst base = \"https://www.zipcomic.com\";\nconst html = getHtmlFromItem(items[0]) || \"\";\nconst norm = html.replace(/\\r/g, \"\");\n\n// Temukan <table> yang memuat \"Issue #\"\nconst tableMatch = norm.match(/<table[^>]*>([\\s\\S]*?Issue\\s*#)[\\s\\S]*?<\\/table>/i);\nif (!tableMatch) {\n return [{\n json: {\n error: \"Daftar issue (tabel) tidak ditemukan.\",\n hint: \"Pastikan input ada di json.body/json.data atau binary.data (base64).\"\n }\n }];\n}\nconst tableHtml = tableMatch[0];\n\nconst aRe = /<a[^>]+href=([\"']?)([^\"'>\\s]+)\\1[^>]*>(.*?)<\\/a>/gi;\n\n// Kumpulkan kandidat per \nconst bucket = {}; // num -> [{ href, text }]\nlet m;\nwhile ((m = aRe.exec(tableHtml)) !== null) {\n const href = m[2];\n const text = m[3].replace(/<[^>]*>/g, \"\").trim();\n if (!/Issue\\s*#/i.test(text)) continue;\n\n const numMatch = text.match(/Issue\\s*#\\D*?(\\d+)/i);\n if (!numMatch) continue;\n const num = numMatch[1];\n\n (bucket[num] ||= []).push({\n href: href.startsWith(\"http\") ? href : (base + href),\n text\n });\n}\n\n// Fungsi skor: semakin tinggi semakin diprioritaskan\nfunction scoreCandidate(num, href, text) {\n let s = 0;\n\n // 1) Prefer link yang match tepat '-issue-<num>' (bukan annual)\n const exactIssue = new RegExp(`-issue-${num}(?:-|$)`, \"i\").test(href);\n if (exactIssue) s += 10;\n\n // 2) Penalti untuk label khusus di URL\n if (/annual/i.test(href)) s -= 8;\n if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(href)) s -= 5;\n\n // 3) Penalti jika anchor text mengandung label khusus\n if (/annual/i.test(text)) s -= 4;\n if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(text)) s -= 3;\n\n // 4) Sedikit bonus jika anchor text terlihat “bersih”\n if (/^Issue\\s*#\\s*\\d+\\s*$/i.test(text.replace(/\\s+/g, \" \"))) s += 2;\n\n // 5) Bonus kecil untuk URL lebih pendek (biasanya versi reguler)\n s += Math.max(0, 2 - Math.min(2, (href.length - 40) / 40));\n\n return s;\n}\n\n// Pilih kandidat terbaik per \nconst results = [];\nfor (const num of Object.keys(bucket).sort((a,b)=>Number(a)-Number(b))) {\n const candidates = bucket[num];\n if (!candidates || candidates.length === 0) continue;\n\n candidates.sort((a, b) => {\n const sa = scoreCandidate(num, a.href, a.text);\n const sb = scoreCandidate(num, b.href, b.text);\n if (sb !== sa) return sb - sa; // skor tinggi dulu\n return a.href.length - b.href.length; // tie-break: URL lebih pendek\n });\n\n const best = candidates[0];\n results.push({\n json: {\n issue: `Issue #${num}`,\n link: best.href\n }\n });\n}\n\nif (results.length === 0) {\n return [{\n json: {\n warning: \"Tidak ditemukan Issue yang valid di dalam tabel.\",\n sample: tableHtml.slice(0, 400)\n }\n }];\n}\n\nreturn results;\n"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
128,
-32
],
"id": "a818b1c8-1358-4b2a-8955-802d2d700f69",
"name": "Code"
},
{
"parameters": {
"command": "=cd /data/comic && python /data/comic/zipcomic_downloader.py \"{{ $json.link }}\"\n"
},
"type": "n8n-nodes-base.executeCommand",
"typeVersion": 1,
"position": [
560,
32
],
"id": "edec39b3-4713-41a8-9a87-9722d4051a41",
"name": "Execute Command"
},
{
"parameters": {
"options": {}
},
"type": "n8n-nodes-base.splitInBatches",
"typeVersion": 3,
"position": [
336,
0
],
"id": "aa389592-57b1-4339-9dcd-3f555c19eda9",
"name": "Loop Over Items",
"executeOnce": true
},
{
"parameters": {
"jsCode": "// n8n Code node\n// Input: items dari Execute Command (tiap item punya .json.stdout)\nreturn items.map(item => {\n const out = (item.json && item.json.stdout) ? String(item.json.stdout) : \"\";\n\n // 1) Ambil URL sumber (baris \"URL: ...\")\n const url = (out.match(/^URL:\\s*(.+)$/m) || [])[1] || item.json.url || item.json.link || null;\n\n // 2) Hitung jumlah gambar\n const imgCountStr = (out.match(/Ditemukan\\s+(\\d+)\\s+gambar/i) || [])[1];\n const images = imgCountStr ? Number(imgCountStr) : null;\n\n // 3) Ambil path PDF dari baris terakhir\n const pdfMatch = out.match(/PDF berhasil dibuat:\\s*(.+\\.pdf)/i);\n const pdfPath = pdfMatch ? pdfMatch[1].trim() : null;\n\n // 4) Pecah jadi nama file & folder\n let pdfName = null, dirPath = null, pagesDir = null;\n if (pdfPath) {\n const parts = pdfPath.split(\"/\").filter(Boolean);\n pdfName = parts.pop(); // e.g. \"The Amazing Spider-Man (2018) - Annual #1.pdf\"\n dirPath = \"/\" + parts.join(\"/\"); // e.g. \"/data/comic/The Amazing Spider-Man (2018) - Annual #1\"\n pagesDir = dirPath + \"/pages\";\n }\n\n // 5) Parse series, year, issue dari nama PDF\n let series = null, year = null, issue = null;\n if (pdfName) {\n const base = pdfName.replace(/\\.pdf$/i, \"\");\n // \"Series (YYYY) - Issue Label\"\n const m = base.match(/^(.*?)(?:\\s*\\((\\d{4})\\))?\\s*-\\s*(.+)$/);\n if (m) {\n series = m[1].trim(); // The Amazing Spider-Man\n year = m[2] || null; // 2018 (opsional)\n issue = m[3].trim(); // \"Annual #1\" atau \"Issue #1\"\n } else {\n series = base;\n }\n }\n\n return {\n json: {\n url,\n images,\n pdfPath,\n pdfName,\n dirPath,\n pagesDir,\n series,\n year,\n issue\n }\n };\n});\n"
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
768,
32
],
"id": "680424f4-8a3e-42a9-827c-741e316615d7",
"name": "Code1"
},
{
"parameters": {
"promptType": "define",
"text": "=",
"hasOutputParser": true,
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.agent",
"typeVersion": 2.2,
"position": [
1216,
16
],
"id": "2aac8209-954f-4a73-adda-9562512fb254",
"name": "AI Agent"
},
{
"parameters": {
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"typeVersion": 1,
"position": [
1168,
256
],
"id": "f77c1eb9-359d-43dd-84d6-0309371be24e",
"name": "Google Gemini Chat Model",
"credentials": {
"googlePalmApi": {
"id": "BH43xOHNCnAzcwGT",
"name": "Google Gemini(PaLM) Api account"
}
}
},
{
"parameters": {
"fileSelector": "={{ $json.pdf_path }}",
"options": {}
},
"type": "n8n-nodes-base.readWriteFile",
"typeVersion": 1,
"position": [
976,
32
],
"id": "12049390-0486-4777-bd8e-b116c1f62814",
"name": "read pdf"
},
{
"parameters": {},
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"typeVersion": 1.3,
"position": [
1328,
240
],
"id": "3ab11731-5e14-4aa0-b475-982bada0edff",
"name": "Structured Output Parser"
}
],
"connections": {
"When clicking ‘Execute workflow’": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "Code",
"type": "main",
"index": 0
}
]
]
},
"Code": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Execute Command": {
"main": [
[
{
"node": "Code1",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[],
[
{
"node": "Execute Command",
"type": "main",
"index": 0
}
]
]
},
"Code1": {
"main": [
[
{
"node": "read pdf",
"type": "main",
"index": 0
}
]
]
},
"Google Gemini Chat Model": {
"ai_languageModel": [
[
{
"node": "AI Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"read pdf": {
"main": [
[
{
"node": "AI Agent",
"type": "main",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "AI Agent",
"type": "ai_outputParser",
"index": 0
}
]
]
}
},
"pinData": {},
"meta": {
"templateCredsSetupCompleted": true,
"instanceId": "e21ea19b3dd924cd44cb97d1b90ad6285488008fc7f249ff9cb5481fda9006c9"
}
}
Share the output returned by the last node
Information on your n8n setup
-
-
n8n version: 1.106.3
-
Database (default: SQLite): postgres
-
n8n EXECUTIONS_PROCESS setting (default: own, main):
-
Running n8n via (Docker, npm, n8n cloud, desktop app):docker
-
Operating system: windows
-