Describe the problem/error/question
i have a workflow where i read a file from google drive then i download it as a binary file then i profile it so i can prepare it to send it to the ai as a prompt but my third node cannot read correctly the binary file
“file_text_preview”:
“~)^�±zo�”,
“extraction_method”:
“direct_utf8”,
“has_readable_text”:
false,
“_debug”:
{
“binary_obj_found”:
true,
“binary_key”:
“data”,
“binary_entry_found”:
true,
“data_field_present”:
true,
“was_gzip”:
false,
“text_length”:
9
What is the error message (if any)?
Please share your workflow
// =============================================================
// EVIDENCE PROFILER – for n8n 2.8.4 (Code node)
// =============================================================
// Uses $('📥 Watch Evidence Inbox') and $('⬇️ Download File Binary')
// Handles gzip decompression and text extraction.
// =============================================================
const trigger = $('📥 Watch Evidence Inbox').first().json;
const binaryObj = $('⬇️ Download File Binary').first().binary;
console.log('binaryObj keys:', Object.keys(binaryObj));
console.log('first key value type:', typeof binaryObj[Object.keys(binaryObj)[0]]);
const binaryKey = binaryObj ? Object.keys(binaryObj)[0] : null;
const binaryEntry = binaryKey ? binaryObj[binaryKey] : null;
const fileName = binaryEntry?.fileName || trigger.name || 'UNNAMED_FILE';
const mimeType = binaryEntry?.mimeType || trigger.mimeType || 'application/octet-stream';
const fileSizeBytes = parseInt(trigger.size || '0', 10);
const fileId = trigger.id || '';
const webLink = trigger.webViewLink || '';
const createdTime = trigger.createdTime || new Date().toISOString();
const owners = trigger.owners || [];
const uploaderEmail = owners[0]?.emailAddress || 'unknown-uploader';
const fileExt = fileName.includes('.') ? fileName.split('.').pop().toLowerCase() : 'bin';
// Integrity hash (FNV‑1a)
let integrityHash = 'unavailable';
try {
if (binaryEntry?.data) {
const buf = Buffer.from(binaryEntry.data, 'base64');
let h = 0x811c9dc5;
const sample = Math.min(buf.length, 8192);
for (let i = 0; i < sample; i++) {
h ^= buf[i];
h = Math.imul(h, 0x01000193) >>> 0;
}
integrityHash = `FNV32-${h.toString(16).toUpperCase().padStart(8, '0')}-SZ${fileSizeBytes}`;
}
} catch (e) {
integrityHash = `hash-error:${e.message.slice(0, 30)}`;
}
// Text extraction with gzip detection
let fileTextPreview = '';
let extractionMethod = 'none';
try {
if (binaryEntry?.data) {
const buf = Buffer.from(binaryEntry.data, 'base64');
const isGzip = buf.length >= 2 && buf[0] === 0x1F && buf[1] === 0x8B;
let decompressed = buf;
if (isGzip) {
const zlib = require('zlib');
decompressed = zlib.gunzipSync(buf);
extractionMethod = 'gzip_decompress_utf8';
} else {
extractionMethod = 'direct_utf8';
}
if (mimeType === 'application/pdf') {
const raw = decompressed.toString('binary');
const runs = [];
let m;
const re = /BT([\s\S]*?)ET/g;
while ((m = re.exec(raw)) !== null && runs.join(' ').length < 2500) {
const chunk = m[1]
.replace(/\/[A-Za-z0-9]+\s+\d+\s+Tf/g, '')
.replace(/\d+\.?\d*\s+\d+\.?\d*\s+Td/g, '')
.replace(/[()]/g, ' ')
.replace(/[^\x20-\x7E\n]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
if (chunk.length > 5) runs.push(chunk);
}
fileTextPreview = runs.join(' ').trim().substring(0, 2500);
extractionMethod = isGzip ? 'gzip+pdf-stream' : 'pdf-stream';
} else if (
mimeType.startsWith('text/') ||
mimeType === 'application/json' ||
mimeType === 'application/xml'
) {
fileTextPreview = decompressed.toString('utf-8').substring(0, 2500);
} else if (
mimeType.startsWith('application/vnd.openxmlformats') ||
mimeType.startsWith('application/msword')
) {
const xml = decompressed.toString('utf-8', 0, 150000);
fileTextPreview = xml
.replace(/<[^>]+>/g, ' ')
.replace(/&[a-z]+;/gi, ' ')
.replace(/\s+/g, ' ')
.trim()
.substring(0, 2500);
extractionMethod = isGzip ? 'gzip+office-xml' : 'office-xml';
} else if (mimeType.startsWith('image/')) {
fileTextPreview = '[IMAGE — classification from filename and metadata only]';
extractionMethod = 'image-meta-only';
} else {
const latin = decompressed.toString('binary', 0, 40000);
fileTextPreview = latin
.replace(/[^\x20-\x7E\n\r\t]/g, ' ')
.replace(/\s+/g, ' ')
.trim()
.substring(0, 2000);
extractionMethod = isGzip ? 'gzip+ascii-fallback' : 'ascii-fallback';
}
} else if (!binaryObj) {
fileTextPreview = '[No binary — Node 2 may not have run successfully]';
extractionMethod = 'no_binary';
} else {
fileTextPreview = '[Binary entry missing data field]';
extractionMethod = 'missing_data';
}
} catch (e) {
fileTextPreview = `[Extraction error: ${e.message}]`;
extractionMethod = 'extraction-error';
}
fileTextPreview = fileTextPreview.replace(/\s{4,}/g, ' ').trim();
const hasReadableText = fileTextPreview.length > 30 && !fileTextPreview.startsWith('[');
// Evidence reference
const now = new Date();
const ymd = now.toISOString().slice(0, 10).replace(/-/g, '');
const idSufx = fileId.slice(-6).toUpperCase() || 'XXXXXX';
const evdRef = `EVD-${ymd}-${idSufx}`;
// Human size
const sizeDisplay = fileSizeBytes > 1048576
? `${(fileSizeBytes / 1048576).toFixed(1)} MB`
: fileSizeBytes > 1024
? `${Math.round(fileSizeBytes / 1024)} KB`
: `${fileSizeBytes} B`;
return {
json: {
evidence_ref: evdRef,
file_id: fileId,
file_name: fileName,
file_ext: fileExt,
mime_type: mimeType,
web_link: webLink,
file_size_bytes: fileSizeBytes,
file_size_display: sizeDisplay,
integrity_hash: integrityHash,
uploader_email: uploaderEmail,
created_time: createdTime,
upload_date: now.toISOString().slice(0, 10),
date_prefix: ymd,
upload_timestamp: now.toISOString(),
file_text_preview: fileTextPreview,
extraction_method: extractionMethod,
has_readable_text: hasReadableText,
_debug: {
binary_obj_found: !!binaryObj,
binary_key: binaryKey,
binary_entry_found: !!binaryEntry,
data_field_present: !!(binaryEntry?.data),
was_gzip: !!(binaryEntry?.data && Buffer.from(binaryEntry.data, 'base64')[0] === 0x1F),
text_length: fileTextPreview.length,
}
}
};
Share the output returned by the last node
Information on your n8n setup
- n8n version: 2.8.4
- Running n8n via (Docker, npm, n8n cloud, desktop app): npm
- Operating system: windows