In topic Binary input not accessible in Code Python (Beta) node - #2 by napped @moosa suggested to move binary data to a JSON as base64 and it did work.
After that change following code can successfully extract attached XML invoice from PDF in n8n cloud version 1.116.2 ![]()
import re
import zlib
import base64
output = []
# Get the binary data from the first input item and decode from base64
#pdf_b64 = items[0]["binary"]["data"]["data"]
pdf_b64 = items[0]["json"]["data"]
#print(pdf_b64)
pdf = base64.b64decode(pdf_b64)
#print(pdf)
stream = re.compile(rb'.*?FlateDecode.*?stream(.*?)endstream', re.S)
for s in stream.findall(pdf):
s = s.strip(b'\r\n')
try:
decompressed = zlib.decompress(s)
decoded = decompressed.decode('utf8')
if 'Invoice' in decoded:
output.append({"json": {"isdoc": decoded}})
except:
pass
return output