Why my node cannot load the binary data

Describe the problem/error/question

i have a workflow where i read a file from google drive then i download it as a binary file then i profile it so i can prepare it to send it to the ai as a prompt but my third node cannot read correctly the binary file
“file_text_preview”:
“~)^�±zo�”,
“extraction_method”:
“direct_utf8”,
“has_readable_text”:
false,
“_debug”:
{
“binary_obj_found”:
true,
“binary_key”:
“data”,
“binary_entry_found”:
true,
“data_field_present”:
true,
“was_gzip”:
false,
“text_length”:
9

What is the error message (if any)?

Please share your workflow

// =============================================================
// EVIDENCE PROFILER – for n8n 2.8.4 (Code node)
// =============================================================
// Uses $('📥 Watch Evidence Inbox') and $('⬇️ Download File Binary')
// Handles gzip decompression and text extraction.
// =============================================================

const trigger = $('📥 Watch Evidence Inbox').first().json;
const binaryObj = $('⬇️ Download File Binary').first().binary;
console.log('binaryObj keys:', Object.keys(binaryObj));
console.log('first key value type:', typeof binaryObj[Object.keys(binaryObj)[0]]);
const binaryKey = binaryObj ? Object.keys(binaryObj)[0] : null;
const binaryEntry = binaryKey ? binaryObj[binaryKey] : null;

const fileName = binaryEntry?.fileName || trigger.name || 'UNNAMED_FILE';
const mimeType = binaryEntry?.mimeType || trigger.mimeType || 'application/octet-stream';
const fileSizeBytes = parseInt(trigger.size || '0', 10);
const fileId = trigger.id || '';
const webLink = trigger.webViewLink || '';
const createdTime = trigger.createdTime || new Date().toISOString();
const owners = trigger.owners || [];
const uploaderEmail = owners[0]?.emailAddress || 'unknown-uploader';
const fileExt = fileName.includes('.') ? fileName.split('.').pop().toLowerCase() : 'bin';

// Integrity hash (FNV‑1a)
let integrityHash = 'unavailable';
try {
  if (binaryEntry?.data) {
    const buf = Buffer.from(binaryEntry.data, 'base64');
    let h = 0x811c9dc5;
    const sample = Math.min(buf.length, 8192);
    for (let i = 0; i < sample; i++) {
      h ^= buf[i];
      h = Math.imul(h, 0x01000193) >>> 0;
    }
    integrityHash = `FNV32-${h.toString(16).toUpperCase().padStart(8, '0')}-SZ${fileSizeBytes}`;
  }
} catch (e) {
  integrityHash = `hash-error:${e.message.slice(0, 30)}`;
}

// Text extraction with gzip detection
let fileTextPreview = '';
let extractionMethod = 'none';

try {
  if (binaryEntry?.data) {
    const buf = Buffer.from(binaryEntry.data, 'base64');
    const isGzip = buf.length >= 2 && buf[0] === 0x1F && buf[1] === 0x8B;

    let decompressed = buf;
    if (isGzip) {
      const zlib = require('zlib');
      decompressed = zlib.gunzipSync(buf);
      extractionMethod = 'gzip_decompress_utf8';
    } else {
      extractionMethod = 'direct_utf8';
    }

    if (mimeType === 'application/pdf') {
      const raw = decompressed.toString('binary');
      const runs = [];
      let m;
      const re = /BT([\s\S]*?)ET/g;
      while ((m = re.exec(raw)) !== null && runs.join(' ').length < 2500) {
        const chunk = m[1]
          .replace(/\/[A-Za-z0-9]+\s+\d+\s+Tf/g, '')
          .replace(/\d+\.?\d*\s+\d+\.?\d*\s+Td/g, '')
          .replace(/[()]/g, ' ')
          .replace(/[^\x20-\x7E\n]/g, ' ')
          .replace(/\s+/g, ' ')
          .trim();
        if (chunk.length > 5) runs.push(chunk);
      }
      fileTextPreview = runs.join(' ').trim().substring(0, 2500);
      extractionMethod = isGzip ? 'gzip+pdf-stream' : 'pdf-stream';
    } else if (
      mimeType.startsWith('text/') ||
      mimeType === 'application/json' ||
      mimeType === 'application/xml'
    ) {
      fileTextPreview = decompressed.toString('utf-8').substring(0, 2500);
    } else if (
      mimeType.startsWith('application/vnd.openxmlformats') ||
      mimeType.startsWith('application/msword')
    ) {
      const xml = decompressed.toString('utf-8', 0, 150000);
      fileTextPreview = xml
        .replace(/<[^>]+>/g, ' ')
        .replace(/&[a-z]+;/gi, ' ')
        .replace(/\s+/g, ' ')
        .trim()
        .substring(0, 2500);
      extractionMethod = isGzip ? 'gzip+office-xml' : 'office-xml';
    } else if (mimeType.startsWith('image/')) {
      fileTextPreview = '[IMAGE — classification from filename and metadata only]';
      extractionMethod = 'image-meta-only';
    } else {
      const latin = decompressed.toString('binary', 0, 40000);
      fileTextPreview = latin
        .replace(/[^\x20-\x7E\n\r\t]/g, ' ')
        .replace(/\s+/g, ' ')
        .trim()
        .substring(0, 2000);
      extractionMethod = isGzip ? 'gzip+ascii-fallback' : 'ascii-fallback';
    }
  } else if (!binaryObj) {
    fileTextPreview = '[No binary — Node 2 may not have run successfully]';
    extractionMethod = 'no_binary';
  } else {
    fileTextPreview = '[Binary entry missing data field]';
    extractionMethod = 'missing_data';
  }
} catch (e) {
  fileTextPreview = `[Extraction error: ${e.message}]`;
  extractionMethod = 'extraction-error';
}

fileTextPreview = fileTextPreview.replace(/\s{4,}/g, '   ').trim();
const hasReadableText = fileTextPreview.length > 30 && !fileTextPreview.startsWith('[');

// Evidence reference
const now = new Date();
const ymd = now.toISOString().slice(0, 10).replace(/-/g, '');
const idSufx = fileId.slice(-6).toUpperCase() || 'XXXXXX';
const evdRef = `EVD-${ymd}-${idSufx}`;

// Human size
const sizeDisplay = fileSizeBytes > 1048576
  ? `${(fileSizeBytes / 1048576).toFixed(1)} MB`
  : fileSizeBytes > 1024
    ? `${Math.round(fileSizeBytes / 1024)} KB`
    : `${fileSizeBytes} B`;

return {
  json: {
    evidence_ref: evdRef,
    file_id: fileId,
    file_name: fileName,
    file_ext: fileExt,
    mime_type: mimeType,
    web_link: webLink,
    file_size_bytes: fileSizeBytes,
    file_size_display: sizeDisplay,
    integrity_hash: integrityHash,
    uploader_email: uploaderEmail,
    created_time: createdTime,
    upload_date: now.toISOString().slice(0, 10),
    date_prefix: ymd,
    upload_timestamp: now.toISOString(),
    file_text_preview: fileTextPreview,
    extraction_method: extractionMethod,
    has_readable_text: hasReadableText,
    _debug: {
      binary_obj_found: !!binaryObj,
      binary_key: binaryKey,
      binary_entry_found: !!binaryEntry,
      data_field_present: !!(binaryEntry?.data),
      was_gzip: !!(binaryEntry?.data && Buffer.from(binaryEntry.data, 'base64')[0] === 0x1F),
      text_length: fileTextPreview.length,
    }
  }
};

Share the output returned by the last node

Information on your n8n setup

  • n8n version: 2.8.4
  • Running n8n via (Docker, npm, n8n cloud, desktop app): npm
  • Operating system: windows

hi @hitham
welcome to the n8n community!
Your node is loading the binary data correctly, but the file you downloaded is not plain UTF-8 text, so converting the raw binary directly to text returns unreadable characters; use a parser specific to the real file type (for example PDF extract, DOCX extract, CSV/Excel parser, or OCR for images) instead of toString(‘utf-8’).