Input pdf that contain image in ai agent

i need to input pdf that contain image in ai agent

Describe the problem/error/question

i need to input pdf that contain image in ai agent, i dont know how

What is the error message (if any)?

Please share your workflow

(Select the nodes on your canvas and use the keyboard shortcuts CMD+C/CTRL+C and CMD+V/CTRL+V to copy and paste the workflow.)
{
  "nodes": [
    {
      "parameters": {},
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        -288,
        -32
      ],
      "id": "1f19da08-ca63-49a0-b893-fd46124c6a39",
      "name": "When clicking ‘Execute workflow’"
    },
    {
      "parameters": {
        "url": "https://www.zipcomic.com/the-amazing-spider-man-2018",
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
      "position": [
        -80,
        -32
      ],
      "id": "2c2930b4-6707-4feb-bc1c-d1a27c45bc8d",
      "name": "HTTP Request"
    },
    {
      "parameters": {
        "jsCode": "/**\n * n8n Code / Function (Run once for all items)\n * Output: satu item per Issue -> { issue, link }\n * - Baca HTML dari json.body / json.data / binary base64\n * - Ekstrak anchor \"Issue #...\"\n * - Bersihkan label (Annual, HU, LR, BEY, dst.)\n * - Pilih link terbaik per  issue (hindari annual/HU/LR/BEY kalau ada versi reguler)\n */\nfunction getHtmlFromItem(item) {\n  if (item.json?.body) return String(item.json.body);\n  if (item.json?.data) return String(item.json.data);\n  const bin = item.binary?.data || item.binary?.file || item.binary?.html;\n  if (bin?.data) {\n    const buff = Buffer.from(bin.data, 'base64');\n    return buff.toString('utf-8');\n  }\n  return \"\";\n}\n\nconst base = \"https://www.zipcomic.com\";\nconst html = getHtmlFromItem(items[0]) || \"\";\nconst norm = html.replace(/\\r/g, \"\");\n\n// Temukan <table> yang memuat \"Issue #\"\nconst tableMatch = norm.match(/<table[^>]*>([\\s\\S]*?Issue\\s*#)[\\s\\S]*?<\\/table>/i);\nif (!tableMatch) {\n  return [{\n    json: {\n      error: \"Daftar issue (tabel) tidak ditemukan.\",\n      hint: \"Pastikan input ada di json.body/json.data atau binary.data (base64).\"\n    }\n  }];\n}\nconst tableHtml = tableMatch[0];\n\nconst aRe = /<a[^>]+href=([\"']?)([^\"'>\\s]+)\\1[^>]*>(.*?)<\\/a>/gi;\n\n// Kumpulkan kandidat per \nconst bucket = {}; // num -> [{ href, text }]\nlet m;\nwhile ((m = aRe.exec(tableHtml)) !== null) {\n  const href = m[2];\n  const text = m[3].replace(/<[^>]*>/g, \"\").trim();\n  if (!/Issue\\s*#/i.test(text)) continue;\n\n  const numMatch = text.match(/Issue\\s*#\\D*?(\\d+)/i);\n  if (!numMatch) continue;\n  const num = numMatch[1];\n\n  (bucket[num] ||= []).push({\n    href: href.startsWith(\"http\") ? href : (base + href),\n    text\n  });\n}\n\n// Fungsi skor: semakin tinggi semakin diprioritaskan\nfunction scoreCandidate(num, href, text) {\n  let s = 0;\n\n  // 1) Prefer link yang match tepat '-issue-<num>' (bukan annual)\n  const exactIssue = new RegExp(`-issue-${num}(?:-|$)`, \"i\").test(href);\n  if (exactIssue) s += 10;\n\n  // 2) Penalti untuk label khusus di URL\n  if (/annual/i.test(href)) s -= 8;\n  if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(href)) s -= 5;\n\n  // 3) Penalti jika anchor text mengandung label khusus\n  if (/annual/i.test(text)) s -= 4;\n  if (/(?:^|[^a-z])(hu|lr|bey)(?:[^a-z]|$)/i.test(text)) s -= 3;\n\n  // 4) Sedikit bonus jika anchor text terlihat “bersih”\n  if (/^Issue\\s*#\\s*\\d+\\s*$/i.test(text.replace(/\\s+/g, \" \"))) s += 2;\n\n  // 5) Bonus kecil untuk URL lebih pendek (biasanya versi reguler)\n  s += Math.max(0, 2 - Math.min(2, (href.length - 40) / 40));\n\n  return s;\n}\n\n// Pilih kandidat terbaik per \nconst results = [];\nfor (const num of Object.keys(bucket).sort((a,b)=>Number(a)-Number(b))) {\n  const candidates = bucket[num];\n  if (!candidates || candidates.length === 0) continue;\n\n  candidates.sort((a, b) => {\n    const sa = scoreCandidate(num, a.href, a.text);\n    const sb = scoreCandidate(num, b.href, b.text);\n    if (sb !== sa) return sb - sa;     // skor tinggi dulu\n    return a.href.length - b.href.length; // tie-break: URL lebih pendek\n  });\n\n  const best = candidates[0];\n  results.push({\n    json: {\n      issue: `Issue #${num}`,\n      link: best.href\n    }\n  });\n}\n\nif (results.length === 0) {\n  return [{\n    json: {\n      warning: \"Tidak ditemukan Issue yang valid di dalam tabel.\",\n      sample: tableHtml.slice(0, 400)\n    }\n  }];\n}\n\nreturn results;\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        128,
        -32
      ],
      "id": "a818b1c8-1358-4b2a-8955-802d2d700f69",
      "name": "Code"
    },
    {
      "parameters": {
        "command": "=cd /data/comic && python /data/comic/zipcomic_downloader.py \"{{ $json.link }}\"\n"
      },
      "type": "n8n-nodes-base.executeCommand",
      "typeVersion": 1,
      "position": [
        560,
        32
      ],
      "id": "edec39b3-4713-41a8-9a87-9722d4051a41",
      "name": "Execute Command"
    },
    {
      "parameters": {
        "options": {}
      },
      "type": "n8n-nodes-base.splitInBatches",
      "typeVersion": 3,
      "position": [
        336,
        0
      ],
      "id": "aa389592-57b1-4339-9dcd-3f555c19eda9",
      "name": "Loop Over Items",
      "executeOnce": true
    },
    {
      "parameters": {
        "jsCode": "// n8n Code node\n// Input: items dari Execute Command (tiap item punya .json.stdout)\nreturn items.map(item => {\n  const out = (item.json && item.json.stdout) ? String(item.json.stdout) : \"\";\n\n  // 1) Ambil URL sumber (baris \"URL: ...\")\n  const url = (out.match(/^URL:\\s*(.+)$/m) || [])[1] || item.json.url || item.json.link || null;\n\n  // 2) Hitung jumlah gambar\n  const imgCountStr = (out.match(/Ditemukan\\s+(\\d+)\\s+gambar/i) || [])[1];\n  const images = imgCountStr ? Number(imgCountStr) : null;\n\n  // 3) Ambil path PDF dari baris terakhir\n  const pdfMatch = out.match(/PDF berhasil dibuat:\\s*(.+\\.pdf)/i);\n  const pdfPath = pdfMatch ? pdfMatch[1].trim() : null;\n\n  // 4) Pecah jadi nama file & folder\n  let pdfName = null, dirPath = null, pagesDir = null;\n  if (pdfPath) {\n    const parts = pdfPath.split(\"/\").filter(Boolean);\n    pdfName = parts.pop();                         // e.g. \"The Amazing Spider-Man (2018) - Annual #1.pdf\"\n    dirPath = \"/\" + parts.join(\"/\");               // e.g. \"/data/comic/The Amazing Spider-Man (2018) - Annual #1\"\n    pagesDir = dirPath + \"/pages\";\n  }\n\n  // 5) Parse series, year, issue dari nama PDF\n  let series = null, year = null, issue = null;\n  if (pdfName) {\n    const base = pdfName.replace(/\\.pdf$/i, \"\");\n    // \"Series (YYYY) - Issue Label\"\n    const m = base.match(/^(.*?)(?:\\s*\\((\\d{4})\\))?\\s*-\\s*(.+)$/);\n    if (m) {\n      series = m[1].trim();        // The Amazing Spider-Man\n      year   = m[2] || null;       // 2018 (opsional)\n      issue  = m[3].trim();        // \"Annual #1\" atau \"Issue #1\"\n    } else {\n      series = base;\n    }\n  }\n\n  return {\n    json: {\n      url,\n      images,\n      pdfPath,\n      pdfName,\n      dirPath,\n      pagesDir,\n      series,\n      year,\n      issue\n    }\n  };\n});\n"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        768,
        32
      ],
      "id": "680424f4-8a3e-42a9-827c-741e316615d7",
      "name": "Code1"
    },
    {
      "parameters": {
        "promptType": "define",
        "text": "=",
        "hasOutputParser": true,
        "options": {}
      },
      "type": "@n8n/n8n-nodes-langchain.agent",
      "typeVersion": 2.2,
      "position": [
        1216,
        16
      ],
      "id": "2aac8209-954f-4a73-adda-9562512fb254",
      "name": "AI Agent"
    },
    {
      "parameters": {
        "options": {}
      },
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "typeVersion": 1,
      "position": [
        1168,
        256
      ],
      "id": "f77c1eb9-359d-43dd-84d6-0309371be24e",
      "name": "Google Gemini Chat Model",
      "credentials": {
        "googlePalmApi": {
          "id": "BH43xOHNCnAzcwGT",
          "name": "Google Gemini(PaLM) Api account"
        }
      }
    },
    {
      "parameters": {
        "fileSelector": "={{ $json.pdf_path }}",
        "options": {}
      },
      "type": "n8n-nodes-base.readWriteFile",
      "typeVersion": 1,
      "position": [
        976,
        32
      ],
      "id": "12049390-0486-4777-bd8e-b116c1f62814",
      "name": "read pdf"
    },
    {
      "parameters": {},
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "typeVersion": 1.3,
      "position": [
        1328,
        240
      ],
      "id": "3ab11731-5e14-4aa0-b475-982bada0edff",
      "name": "Structured Output Parser"
    }
  ],
  "connections": {
    "When clicking ‘Execute workflow’": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Execute Command": {
      "main": [
        [
          {
            "node": "Code1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Execute Command",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code1": {
      "main": [
        [
          {
            "node": "read pdf",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "read pdf": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "AI Agent",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    }
  },
  "pinData": {},
  "meta": {
    "templateCredsSetupCompleted": true,
    "instanceId": "e21ea19b3dd924cd44cb97d1b90ad6285488008fc7f249ff9cb5481fda9006c9"
  }
}

Share the output returned by the last node

Information on your n8n setup

    • n8n version: 1.106.3

    • Database (default: SQLite): postgres

    • n8n EXECUTIONS_PROCESS setting (default: own, main):

    • Running n8n via (Docker, npm, n8n cloud, desktop app):docker

    • Operating system: windows

Try to use this Node instead:

1 Like

Thanks A lot

You are welcome!

If this helped you to solve your problem, kindly mark the answer as solution. Thank you.

Cheers.

what you can do is use pdf.co to convert pdf to image then pass into the ai model:

you can also specify pages you want as pdf.