Input pdf that contain image in ai agent

Futurebillionaire · August 12, 2025, 7:11am

i need to input pdf that contain image in ai agent

Describe the problem/error/question

i need to input pdf that contain image in ai agent, i dont know how

What is the error message (if any)?

Please share your workflow

(Select the nodes on your canvas and use the keyboard shortcuts CMD+C/CTRL+C and CMD+V/CTRL+V to copy and paste the workflow.)
{
  "nodes": [
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        -288,
        -32
      ],
      "id": "1f19da08-ca63-49a0-b893-fd46124c6a39",
      "name": "When clicking ‘Execute workflow’"
    },
    {
      "parameters": {
        "url": "https://www.zipcomic.com/the-amazing-spider-man-2018",
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        -80,
        -32
      ],
      "id": "2c2930b4-6707-4feb-bc1c-d1a27c45bc8d",
      "name": "HTTP Request"
    },
    {
      "parameters": {
        "jsCode": "/**\n * n8n Code / Function (Run once for all items)\n * Output: satu item per Issue -> { issue, link }\n * - Baca HTML dari json.body / json.data / binary base64\n * - Ekstrak anchor \"Issue #...\"\n * - Bersihkan label (Annual, HU, LR, BEY, dst.)\n * - Pilih link terbaik per  issue (hindari annual/HU/LR/BEY kalau ada versi reguler)\n */\nfunction getHtmlFromItem(item) {\n  if (item.json?.body) return String(item.json.body);\n  if (item.json?.data) return String(item.json.data);\n  const bin = item.binary?.data || item.binary?.file || item.binary?.html;\n  if (bin?.data) {\n    const buff = Buffer.from(bin.data, 'base64');\n    return buff.toString('utf-8');\n  }\n  return \"\";\n}\n\nconst base = \"https://www.zipcomic.com\";\nconst html = getHtmlFromItem(items[0]) || \"\";\nconst norm = html.replace(/\\r/g, \"\");\n\n// Temukan <table> yang memuat \"Issue #\"\nconst tableMatch = norm.match(/<table[^>]*>([\\s\\S]*?Issue\\s*#)[\\s\\S]*?<\\/table>/i);\nif (!tableMatch) {\n  return [{\n    json: {\n      error: \"Daftar issue (tabel) tidak ditemukan.\",\n      hint: \"Pastikan input ada di json.body/json.data atau binary.data (base64).\"\n    }\n  }];\n}\nconst tableHtml = tableMatch[0];\n\nconst aRe = /<a[^>]+href=([\"']?)([^\"'>\\s]+)\\1[^>]*>(.*?)<\\/a>/gi;\n\n// Kumpulkan kandidat per \nconst bucket = {}; // num -> [{ href, text }]\nlet m;\nwhile ((m = aRe.exec(tableHtml)) !== null) {\n  const href = m[2];\n  const text = m[3].replace(/<[^>]*>/g, \"\").trim();\n  if (!/Issue\\s*#/i.test(text)) continue;\n\n  const numMatch = text.match(/Issue\\s*#\\D*?(\\d+)/i);\n  if (!numMatch) continue;\n  const num = numMatch[1];\n\n  (bucket[num] ||= []).push({\n    href: href.startsWith(\"http\") ? href : (base + href),\n    text\n  });\n}\n\n// Fungsi skor: semakin tinggi semakin diprioritaskan\nfunction scoreCandidate(num, href, text) {\n  let s = 0;\n\n  // 1) Prefer link yang match tepat '-issue-<num>' (bukan annual)\n  const exactIssue = new RegExp(`-issue-${num}(?:-|$)`, \"i\").test(href);\n  if (exactIssue) s += 10;\n\n  // 2) Penalti untuk label khusus di URL\n  if (/annual/i.test(href)) s -= 8;\n  if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(href)) s -= 5;\n\n  // 3) Penalti jika anchor text mengandung label khusus\n  if (/annual/i.test(text)) s -= 4;\n  if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(text)) s -= 3;\n\n  // 4) Sedikit bonus jika anchor text terlihat “bersih”\n  if (/^Issue\\s*#\\s*\\d+\\s*$/i.test(text.replace(/\\s+/g, \" \"))) s += 2;\n\n  // 5) Bonus kecil untuk URL lebih pendek (biasanya versi reguler)\n  s += Math.max(0, 2 - Math.min(2, (href.length - 40) / 40));\n\n  return s;\n}\n\n// Pilih kandidat terbaik per \nconst results = [];\nfor (const num of Object.keys(bucket).sort((a,b)=>Number(a)-Number(b))) {\n  const candidates = bucket[num];\n  if (!candidates || candidates.length === 0) continue;\n\n  candidates.sort((a, b) => {\n    const sa = scoreCandidate(num, a.href, a.text);\n    const sb = scoreCandidate(num, b.href, b.text);\n    if (sb !== sa) return sb - sa;     // skor tinggi dulu\n    return a.href.length - b.href.length; // tie-break: URL lebih pendek\n  });\n\n  const best = candidates[0];\n  results.push({\n    json: {\n      issue: `Issue #${num}`,\n      link: best.href\n    }\n  });\n}\n\nif (results.length === 0) {\n  return [{\n    json: {\n      warning: \"Tidak ditemukan Issue yang valid di dalam tabel.\",\n      sample: tableHtml.slice(0, 400)\n    }\n  }];\n}\n\nreturn results;\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        128,
        -32
      ],
      "id": "a818b1c8-1358-4b2a-8955-802d2d700f69",
      "name": "Code"
    },
    {
      "parameters": {
        "command": "=cd /data/comic && python /data/comic/zipcomic_downloader.py \"{{ $json.link }}\"\n"
      },
      "type": "n8n-nodes-base.executeCommand",
      "typeVersion": 1,
      "position": [
        560,
        32
      ],
      "id": "edec39b3-4713-41a8-9a87-9722d4051a41",
      "name": "Execute Command"
    },
    {
      "parameters": {
        "options": {}
      },
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        336,
        0
      ],
      "id": "aa389592-57b1-4339-9dcd-3f555c19eda9",
      "name": "Loop Over Items",
      "executeOnce": true
    },
    {
      "parameters": {
        "jsCode": "// n8n Code node\n// Input: items dari Execute Command (tiap item punya .json.stdout)\nreturn items.map(item => {\n  const out = (item.json && item.json.stdout) ? String(item.json.stdout) : \"\";\n\n  // 1) Ambil URL sumber (baris \"URL: ...\")\n  const url = (out.match(/^URL:\\s*(.+)$/m) || [])[1] || item.json.url || item.json.link || null;\n\n  // 2) Hitung jumlah gambar\n  const imgCountStr = (out.match(/Ditemukan\\s+(\\d+)\\s+gambar/i) || [])[1];\n  const images = imgCountStr ? Number(imgCountStr) : null;\n\n  // 3) Ambil path PDF dari baris terakhir\n  const pdfMatch = out.match(/PDF berhasil dibuat:\\s*(.+\\.pdf)/i);\n  const pdfPath = pdfMatch ? pdfMatch[1].trim() : null;\n\n  // 4) Pecah jadi nama file & folder\n  let pdfName = null, dirPath = null, pagesDir = null;\n  if (pdfPath) {\n    const parts = pdfPath.split(\"/\").filter(Boolean);\n    pdfName = parts.pop();                         // e.g. \"The Amazing Spider-Man (2018) - Annual #1.pdf\"\n    dirPath = \"/\" + parts.join(\"/\");               // e.g. \"/data/comic/The Amazing Spider-Man (2018) - Annual #1\"\n    pagesDir = dirPath + \"/pages\";\n  }\n\n  // 5) Parse series, year, issue dari nama PDF\n  let series = null, year = null, issue = null;\n  if (pdfName) {\n    const base = pdfName.replace(/\\.pdf$/i, \"\");\n    // \"Series (YYYY) - Issue Label\"\n    const m = base.match(/^(.*?)(?:\\s*\\((\\d{4})\\))?\\s*-\\s*(.+)$/);\n    if (m) {\n      series = m[1].trim();        // The Amazing Spider-Man\n      year   = m[2] || null;       // 2018 (opsional)\n      issue  = m[3].trim();        // \"Annual #1\" atau \"Issue #1\"\n    } else {\n      series = base;\n    }\n  }\n\n  return {\n    json: {\n      url,\n      images,\n      pdfPath,\n      pdfName,\n      dirPath,\n      pagesDir,\n      series,\n      year,\n      issue\n    }\n  };\n});\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        768,
        32
      ],
      "id": "680424f4-8a3e-42a9-827c-741e316615d7",
      "name": "Code1"
    },
    {
      "parameters": {
        "promptType": "define",
        "text": "=",
        "hasOutputParser": true,
        "options": {}
      },
      "type": "@n8n/n8n-nodes-langchain.agent",
      "typeVersion": 2.2,
      "position": [
        1216,
        16
      ],
      "id": "2aac8209-954f-4a73-adda-9562512fb254",
      "name": "AI Agent"
    },
    {
      "parameters": {
        "options": {}
      },
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "typeVersion": 1,
      "position": [
        1168,
        256
      ],
      "id": "f77c1eb9-359d-43dd-84d6-0309371be24e",
      "name": "Google Gemini Chat Model",
      "credentials": {
        "googlePalmApi": {
          "id": "BH43xOHNCnAzcwGT",
          "name": "Google Gemini(PaLM) Api account"
        }
      }
    },
    {
      "parameters": {
        "fileSelector": "={{ $json.pdf_path }}",
        "options": {}
      },
      "type": "n8n-nodes-base.readWriteFile",
      "typeVersion": 1,
      "position": [
        976,
        32
      ],
      "id": "12049390-0486-4777-bd8e-b116c1f62814",
      "name": "read pdf"
    },
    {
      "parameters": {},
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "typeVersion": 1.3,
      "position": [
        1328,
        240
      ],
      "id": "3ab11731-5e14-4aa0-b475-982bada0edff",
      "name": "Structured Output Parser"
    }
  ],
  "connections": {
    "When clicking ‘Execute workflow’": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Execute Command": {
      "main": [
        [
          {
            "node": "Code1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Execute Command",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code1": {
      "main": [
        [
          {
            "node": "read pdf",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "read pdf": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "AI Agent",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    }
  },
  "pinData": {},
  "meta": {
    "templateCredsSetupCompleted": true,
    "instanceId": "e21ea19b3dd924cd44cb97d1b90ad6285488008fc7f249ff9cb5481fda9006c9"
  }
}

Share the output returned by the last node

Information on your n8n setup

- n8n version: 1.106.3
- Database (default: SQLite): postgres
- n8n EXECUTIONS_PROCESS setting (default: own, main):
- Running n8n via (Docker, npm, n8n cloud, desktop app):docker
- Operating system: windows

jabbson · August 12, 2025, 6:42pm

Try to use this Node instead:

Futurebillionaire · August 13, 2025, 4:55pm

Thanks A lot

jabbson · August 13, 2025, 4:57pm

You are welcome!

If this helped you to solve your problem, kindly mark the answer as solution. Thank you.

Cheers.

moosa · August 13, 2025, 7:03pm

what you can do is use pdf.co to convert pdf to image then pass into the ai model:

you can also specify pages you want as pdf.