Html suppress certain tags when extracting

When doing HTML extract, is there a way to suppress, remove, ignore specific CSS classes?

so say pulling HTML and at the end, there is a section that I want to remove, can it be done?

<p>xxxxxxxxx</p> <div style="text-align:center"> <figure class="image" style="display:inline-block"><img alt="xxxxxxxxxxxxxxxxxxx" height="687" src="xxxxxxxxxxxx" width="800"><figcaption><em>xxxxxxx</em></figcaption></figure></div> 

<h2>Axxxxxxxxxxxxxx</h2> 
<p>xxxxxxxxxx</p> 
<p>

and then there is this section of HTML at the bottom that I Want to cut these section off

Related Articles:



<a href="xxxxxxxxxxx class=“xxxxxx”>xxxxxxxxxx

I do not think this is possible with the HTML extract node. But, you probably can do a string replace to achieve this.

1 Like

ill give it a go

yeah not sure i have the ability to do this alone. lol Think I’ll have to come back to it

Can you provide an example of the html that you are working with, and what you want to remove?

i think this might be of some use/

yeah sure i’ll give an example

So i have a flow like this

basically scraps a website looking for articles

{
  "nodes": [
{
  "parameters": {
    "url": "https://www.bleepingcomputer.com/news/security/",
    "responseFormat": "string",
    "options": {}
  },
  "name": "HTTP Request",
  "type": "n8n-nodes-base.httpRequest",
  "typeVersion": 1,
  "position": [
    -110,
    140
  ]
},
{
  "parameters": {
    "extractionValues": {
      "values": [
        {
          "key": "url",
          "cssSelector": "a",
          "returnValue": "attribute",
          "attribute": "href",
          "returnArray": true
        }
      ]
    },
    "options": {}
  },
  "name": "HTML Extract",
  "type": "n8n-nodes-base.htmlExtract",
  "typeVersion": 1,
  "position": [
    90,
    140
  ]
},
{
  "parameters": {
    "functionCode": "return items[0].json.url.map(item => {\n  return {\n    json: {\n      data:item\n    },\n  }\n});\n\n"
  },
  "name": "Function",
  "type": "n8n-nodes-base.function",
  "position": [
    280,
    140
  ],
  "typeVersion": 1
},
{
  "parameters": {
    "conditions": {
      "string": [
        {
          "value1": "={{$json[\"data\"]}}",
          "operation": "contains",
          "value2": "/security/"
        },
        {
          "value1": "={{$json[\"data\"]}}",
          "operation": "notContains",
          "value2": "#"
        }
      ],
      "boolean": []
    }
  },
  "name": "IF",
  "type": "n8n-nodes-base.if",
  "typeVersion": 1,
  "position": [
    500,
    140
  ]
},
{
  "parameters": {
    "functionCode": "const data = [];\n\nfor (const item of items) {\n  if (data.includes(item.json.data)) continue\n  data.push(item.json.data)\n}\n\nreturn data.map((url) => ({ json:{ data: url } }))\n"
  },
  "name": "Function1",
  "type": "n8n-nodes-base.function",
  "position": [
    680,
    130
  ],
  "typeVersion": 1
},
{
  "parameters": {
    "url": "={{$json[\"data\"]}}",
    "responseFormat": "string",
    "options": {}
  },
  "name": "HTTP Request1",
  "type": "n8n-nodes-base.httpRequest",
  "typeVersion": 1,
  "position": [
    880,
    130
  ]
},
{
  "parameters": {
    "extractionValues": {
      "values": [
        {
          "key": "Title",
          "cssSelector": "h1"
        },
        {
          "key": "Article",
          "cssSelector": ".articleBody"
        }
      ]
    },
    "options": {}
  },
  "name": "H1 Extract",
  "type": "n8n-nodes-base.htmlExtract",
  "typeVersion": 1,
  "position": [
    1160,
    130
  ],
  "executeOnce": false
}
  ],
  "connections": {
"HTTP Request": {
  "main": [
    [
      {
        "node": "HTML Extract",
        "type": "main",
        "index": 0
      }
    ]
  ]
},
"HTML Extract": {
  "main": [
    [
      {
        "node": "Function",
        "type": "main",
        "index": 0
      }
    ]
  ]
},
"Function": {
  "main": [
    [
      {
        "node": "IF",
        "type": "main",
        "index": 0
      }
    ]
  ]
},
"IF": {
  "main": [
    [
      {
        "node": "Function1",
        "type": "main",
        "index": 0
      }
    ]
  ]
},
"Function1": {
  "main": [
    [
      {
        "node": "HTTP Request1",
        "type": "main",
        "index": 0
      }
    ]
  ]
},
"HTTP Request1": {
  "main": [
    [
      {
        "node": "H1 Extract",
        "type": "main",
        "index": 0
      }
    ]
  ]
}
  }
}

then trying to strip out this

anything in this div section

<div class="cz-related-article-wrapp">

an example of one article (bear in mind the div classes are the same but the content changes)

<p style="text-align:center"><img alt="Ransomware gang uses PrintNightmare to breach Windows servers" src="https://www.bleepstatic.com/content/hl-images/2021/05/17/Windows.jpg"></p> <p>Ransomware operators have added PrintNightmare exploits to their arsenal and are targeting Windows servers to deploy Magniber ransomware payloads.</p> <p><a href="https://www.bleepingcomputer.com/tag/printnightmare/" target="_blank">PrintNightmare</a> is a class of security vulnerabilities (tracked as <a href="https://msrc.microsoft.com/update-guide/vulnerability/CVE-2021-1675" target="_blank" rel="nofollow noopener">CVE-2021-1675</a>, <a href="https://msrc.microsoft.com/update-guide/vulnerability/CVE-2021-34527" rel="nofollow noopener">CVE-2021-34527</a>, and <a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/" rel="nofollow noopener" target="_blank">CVE-2021-36958</a>) impacting the Windows Print Spooler service, Windows print drivers, and the Windows Point and Print feature.</p> <p>Microsoft has released security updates to address CVE-2021-1675 and CVE-2021-34527 in <a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-june-2021-patch-tuesday-fixes-6-exploited-zero-days-50-flaws/" target="_blank">June</a>, <a href="https://www.bleepingcomputer.com/news/security/microsoft-pushes-emergency-update-for-windows-printnightmare-zero-day/" target="_blank">July</a>, and <a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-fixes-windows-print-spooler-printnightmare-vulnerability/" target="_blank">August</a>.</p> <p>The company has also published a security advisory on Wednesday providing a <a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/" target="_blank">workaround for CVE-2021-36958</a> (a zero-day bug allowing privilege escalation, with no patch available).</p> <p>Threat actors can use these security flaws in local privilege escalation (LPE) or distribute malware as Windows domain admins via remote code execution (RCE) with SYSTEM privileges.</p> <h2>Ransomware now using PrintNightmare exploits</h2> <p>And, as Crowdstrike researchers discovered last month, the Magniber ransomware gang is now using PrintNightmare exploits for these exact purposes in attacks against South Korean victims.</p> <p>"On July 13, CrowdStrike successfully detected and prevented attempts at exploiting the PrintNightmare vulnerability, protecting customers before any encryption takes place," <a href="https://www.crowdstrike.com/blog/magniber-ransomware-caught-using-printnightmare-vulnerability/" target="_blank" rel="nofollow noopener">said Liviu Arsene</a>, Crowdstrike's Director of Threat Research and Reporting.</p> <p>After compromising servers unpatched against PrintNightmare, Magniber drops an obfuscated DLL loader, which gets first injected into a process and later unpacked to perform local file traversal and encrypt files on the compromised device.</p> <p>In early February 2021, Crowdstrike&nbsp;observed Magniber being delivered via Magnitude EK onto South Korean devices running Internet Explorer unpatched against the <a href="https://msrc.microsoft.com/update-guide/vulnerability/CVE-2020-0968" target="_blank" rel="nofollow noopener">CVE-2020-0968</a> vulnerability.</p> <p>Magniber ransomware has been <a href="https://www.bleepingcomputer.com/news/security/goodbye-cerber-hello-magniber-ransomware/" target="_blank">active since October 2017</a>, when it was being deployed through malvertising using the Magnitude Exploit Kit (EK) as the successor of <a href="https://www.bleepingcomputer.com/news/security/the-cerber-ransomware-not-only-encrypts-your-data-but-also-speaks-to-you/" target="_blank">Cerber ransomware</a>.</p> <p>While it initially focused on South Korean victims, the Magniber gang soon <a href="https://www.bleepingcomputer.com/news/security/magniber-ransomware-expands-from-south-korea-to-target-other-asian-countries/" target="_blank">expanded its operations worldwide</a>, switching targets to other countries, including China, Taiwan, Hong Kong, Singapore, Malaysia, and more.</p> <p>Magniber&nbsp;has been surprisingly active during the last 30 days, with almost 600 submissions on the&nbsp;ID Ransomware platform.</p> <div style="text-align:center"> <figure class="image" style="display:inline-block"><img alt="Magniber&nbsp;ransomware" height="282" src="https://www.bleepstatic.com/images/news/u/1109292/2021/Magniber%C2%A0ransomware.png" width="935"><figcaption><em>Magniber ransomware submissions (ID Ransomware)</em></figcaption></figure></div> <h2>More threat groups expected to add PrintNightmare to their arsenals</h2> <p>At the moment we only have evidence that the Magniber&nbsp;ransomware gang is using PrintNightmware exploits in the wild to target potential victims.</p> <p>However, other attackers (including ransomware groups) will likely join in (if they haven't already), seeing that there are other reports of in-the-wild PrintNightmare exploitation&nbsp;[<a href="https://twitter.com/BushidoToken/status/1422492498241392647" target="_blank" rel="nofollow noopener">1</a>, <a href="https://twitter.com/securitydoggo/status/1422241229392203777" target="_blank" rel="nofollow noopener">2</a>, <a href="https://twitter.com/John_Fokker/status/1425749521569624065" target="_blank" rel="nofollow noopener">3</a>] have surfaced since the vulnerability was reported and <a href="https://www.bleepingcomputer.com/news/security/public-windows-printnightmare-0-day-exploit-allows-domain-takeover/" target="_blank">proof-of-concept exploits were leaked</a>.</p> <p>"CrowdStrike estimates that the PrintNightmare vulnerability coupled with the deployment of ransomware will likely continue to be exploited by other threat actors," Arsene concluded.</p> <p>To defend against attacks that might target your network, you are advised to apply any available patches as soon as possible and implement workarounds provided by Microsoft to remove the attack vector if a security update is not yet available.</p> <p>On July 13, CISA issued an emergency directive <a href="https://www.bleepingcomputer.com/news/security/cisa-orders-federal-agencies-to-patch-windows-printnightmare-bug/" target="_blank">ordering federal agencies</a> to mitigate the actively exploited <a href="https://www.bleepingcomputer.com/news/security/microsoft-printnightmare-security-updates-work-start-patching/" target="_blank">PrintNightmare</a> vulnerability on their networks.</p> <p>The cybersecurity agency also published a <a href="https://www.bleepingcomputer.com/news/security/cisa-disable-windows-print-spooler-on-servers-not-used-for-printing/" target="_blank">PrintNightmare alert</a> on July 1st, encouraging security professionals to disable the Windows Print Spooler service on all systems not used for printing.</p> <div class="cz-related-article-wrapp"> <h3>Related Articles:</h3> <p><a href="https://www.bleepingcomputer.com/news/security/vice-society-ransomware-joins-ongoing-printnightmare-attacks/" class="ras">Vice Society ransomware joins ongoing PrintNightmare attacks</a></p><p><a href="https://www.bleepingcomputer.com/news/security/the-week-in-ransomware-august-13th-2021-the-rise-of-lockbit/" class="ras">The Week in Ransomware - August 13th 2021 - The rise of LockBit</a></p><p><a href="https://www.bleepingcomputer.com/news/security/accenture-confirms-hack-after-lockbit-ransomware-data-leak-threats/" class="ras">Accenture confirms hack after LockBit ransomware data leak threats</a></p><p><a href="https://www.bleepingcomputer.com/news/security/synack-ransomware-releases-decryption-keys-after-el-cometa-rebrand/" class="ras">SynAck ransomware releases decryption keys after El_Cometa rebrand</a></p><p><a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/" class="ras">Microsoft confirms another Windows print spooler zero-day bug</a></p> </div>

and remove anything after this part

<div class="cz-related-article-wrapp"> <h3>Related Articles:</h3> <p><a href="https://www.bleepingcomputer.com/news/security/vice-society-ransomware-joins-ongoing-printnightmare-attacks/" class="ras">Vice Society ransomware joins ongoing PrintNightmare attacks</a></p><p><a href="https://www.bleepingcomputer.com/news/security/the-week-in-ransomware-august-13th-2021-the-rise-of-lockbit/" class="ras">The Week in Ransomware - August 13th 2021 - The rise of LockBit</a></p><p><a href="https://www.bleepingcomputer.com/news/security/accenture-confirms-hack-after-lockbit-ransomware-data-leak-threats/" class="ras">Accenture confirms hack after LockBit ransomware data leak threats</a></p><p><a href="https://www.bleepingcomputer.com/news/security/synack-ransomware-releases-decryption-keys-after-el-cometa-rebrand/" class="ras">SynAck ransomware releases decryption keys after El_Cometa rebrand</a></p><p><a href="https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/" class="ras">Microsoft confirms another Windows print spooler zero-day bug</a></p> </div>

You need a function node similar to this:

const html = items[0].json.html

const regex = /(<div class="cz-related-article-wrapp">).*?(<\/div>)/gmi;

const matched = html.match(regex);

return [
  {
    json: {
      html: html.replace(matched[0], '')
    }
  }
]

Example Workflow
{
  "nodes": [
    {
      "parameters": {},
      "name": "Start",
      "type": "n8n-nodes-base.start",
      "typeVersion": 1,
      "position": [
        -4470,
        -430
      ]
    },
    {
      "parameters": {
        "values": {
          "string": [
            {
              "name": "html",
              "value": "<p style=\"text-align:center\"><img alt=\"Ransomware gang uses PrintNightmare to breach Windows servers\" src=\"https://www.bleepstatic.com/content/hl-images/2021/05/17/Windows.jpg\"></p> <p>Ransomware operators have added PrintNightmare exploits to their arsenal and are targeting Windows servers to deploy Magniber ransomware payloads.</p> <p><a href=\"https://www.bleepingcomputer.com/tag/printnightmare/\" target=\"_blank\">PrintNightmare</a> is a class of security vulnerabilities (tracked as <a href=\"https://msrc.microsoft.com/update-guide/vulnerability/CVE-2021-1675\" target=\"_blank\" rel=\"nofollow noopener\">CVE-2021-1675</a>, <a href=\"https://msrc.microsoft.com/update-guide/vulnerability/CVE-2021-34527\" rel=\"nofollow noopener\">CVE-2021-34527</a>, and <a href=\"https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/\" rel=\"nofollow noopener\" target=\"_blank\">CVE-2021-36958</a>) impacting the Windows Print Spooler service, Windows print drivers, and the Windows Point and Print feature.</p> <p>Microsoft has released security updates to address CVE-2021-1675 and CVE-2021-34527 in <a href=\"https://www.bleepingcomputer.com/news/microsoft/microsoft-june-2021-patch-tuesday-fixes-6-exploited-zero-days-50-flaws/\" target=\"_blank\">June</a>, <a href=\"https://www.bleepingcomputer.com/news/security/microsoft-pushes-emergency-update-for-windows-printnightmare-zero-day/\" target=\"_blank\">July</a>, and <a href=\"https://www.bleepingcomputer.com/news/microsoft/microsoft-fixes-windows-print-spooler-printnightmare-vulnerability/\" target=\"_blank\">August</a>.</p> <p>The company has also published a security advisory on Wednesday providing a <a href=\"https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/\" target=\"_blank\">workaround for CVE-2021-36958</a> (a zero-day bug allowing privilege escalation, with no patch available).</p> <p>Threat actors can use these security flaws in local privilege escalation (LPE) or distribute malware as Windows domain admins via remote code execution (RCE) with SYSTEM privileges.</p> <h2>Ransomware now using PrintNightmare exploits</h2> <p>And, as Crowdstrike researchers discovered last month, the Magniber ransomware gang is now using PrintNightmare exploits for these exact purposes in attacks against South Korean victims.</p> <p>\"On July 13, CrowdStrike successfully detected and prevented attempts at exploiting the PrintNightmare vulnerability, protecting customers before any encryption takes place,\" <a href=\"https://www.crowdstrike.com/blog/magniber-ransomware-caught-using-printnightmare-vulnerability/\" target=\"_blank\" rel=\"nofollow noopener\">said Liviu Arsene</a>, Crowdstrike's Director of Threat Research and Reporting.</p> <p>After compromising servers unpatched against PrintNightmare, Magniber drops an obfuscated DLL loader, which gets first injected into a process and later unpacked to perform local file traversal and encrypt files on the compromised device.</p> <p>In early February 2021, Crowdstrike&nbsp;observed Magniber being delivered via Magnitude EK onto South Korean devices running Internet Explorer unpatched against the <a href=\"https://msrc.microsoft.com/update-guide/vulnerability/CVE-2020-0968\" target=\"_blank\" rel=\"nofollow noopener\">CVE-2020-0968</a> vulnerability.</p> <p>Magniber ransomware has been <a href=\"https://www.bleepingcomputer.com/news/security/goodbye-cerber-hello-magniber-ransomware/\" target=\"_blank\">active since October 2017</a>, when it was being deployed through malvertising using the Magnitude Exploit Kit (EK) as the successor of <a href=\"https://www.bleepingcomputer.com/news/security/the-cerber-ransomware-not-only-encrypts-your-data-but-also-speaks-to-you/\" target=\"_blank\">Cerber ransomware</a>.</p> <p>While it initially focused on South Korean victims, the Magniber gang soon <a href=\"https://www.bleepingcomputer.com/news/security/magniber-ransomware-expands-from-south-korea-to-target-other-asian-countries/\" target=\"_blank\">expanded its operations worldwide</a>, switching targets to other countries, including China, Taiwan, Hong Kong, Singapore, Malaysia, and more.</p> <p>Magniber&nbsp;has been surprisingly active during the last 30 days, with almost 600 submissions on the&nbsp;ID Ransomware platform.</p> <div style=\"text-align:center\"> <figure class=\"image\" style=\"display:inline-block\"><img alt=\"Magniber&nbsp;ransomware\" height=\"282\" src=\"https://www.bleepstatic.com/images/news/u/1109292/2021/Magniber%C2%A0ransomware.png\" width=\"935\"><figcaption><em>Magniber ransomware submissions (ID Ransomware)</em></figcaption></figure></div> <h2>More threat groups expected to add PrintNightmare to their arsenals</h2> <p>At the moment we only have evidence that the Magniber&nbsp;ransomware gang is using PrintNightmware exploits in the wild to target potential victims.</p> <p>However, other attackers (including ransomware groups) will likely join in (if they haven't already), seeing that there are other reports of in-the-wild PrintNightmare exploitation&nbsp;[<a href=\"https://twitter.com/BushidoToken/status/1422492498241392647\" target=\"_blank\" rel=\"nofollow noopener\">1</a>, <a href=\"https://twitter.com/securitydoggo/status/1422241229392203777\" target=\"_blank\" rel=\"nofollow noopener\">2</a>, <a href=\"https://twitter.com/John_Fokker/status/1425749521569624065\" target=\"_blank\" rel=\"nofollow noopener\">3</a>] have surfaced since the vulnerability was reported and <a href=\"https://www.bleepingcomputer.com/news/security/public-windows-printnightmare-0-day-exploit-allows-domain-takeover/\" target=\"_blank\">proof-of-concept exploits were leaked</a>.</p> <p>\"CrowdStrike estimates that the PrintNightmare vulnerability coupled with the deployment of ransomware will likely continue to be exploited by other threat actors,\" Arsene concluded.</p> <p>To defend against attacks that might target your network, you are advised to apply any available patches as soon as possible and implement workarounds provided by Microsoft to remove the attack vector if a security update is not yet available.</p> <p>On July 13, CISA issued an emergency directive <a href=\"https://www.bleepingcomputer.com/news/security/cisa-orders-federal-agencies-to-patch-windows-printnightmare-bug/\" target=\"_blank\">ordering federal agencies</a> to mitigate the actively exploited <a href=\"https://www.bleepingcomputer.com/news/security/microsoft-printnightmare-security-updates-work-start-patching/\" target=\"_blank\">PrintNightmare</a> vulnerability on their networks.</p> <p>The cybersecurity agency also published a <a href=\"https://www.bleepingcomputer.com/news/security/cisa-disable-windows-print-spooler-on-servers-not-used-for-printing/\" target=\"_blank\">PrintNightmare alert</a> on July 1st, encouraging security professionals to disable the Windows Print Spooler service on all systems not used for printing.</p> <div class=\"cz-related-article-wrapp\"> <h3>Related Articles:</h3> <p><a href=\"https://www.bleepingcomputer.com/news/security/vice-society-ransomware-joins-ongoing-printnightmare-attacks/\" class=\"ras\">Vice Society ransomware joins ongoing PrintNightmare attacks</a></p><p><a href=\"https://www.bleepingcomputer.com/news/security/the-week-in-ransomware-august-13th-2021-the-rise-of-lockbit/\" class=\"ras\">The Week in Ransomware - August 13th 2021 - The rise of LockBit</a></p><p><a href=\"https://www.bleepingcomputer.com/news/security/accenture-confirms-hack-after-lockbit-ransomware-data-leak-threats/\" class=\"ras\">Accenture confirms hack after LockBit ransomware data leak threats</a></p><p><a href=\"https://www.bleepingcomputer.com/news/security/synack-ransomware-releases-decryption-keys-after-el-cometa-rebrand/\" class=\"ras\">SynAck ransomware releases decryption keys after El_Cometa rebrand</a></p><p><a href=\"https://www.bleepingcomputer.com/news/microsoft/microsoft-confirms-another-windows-print-spooler-zero-day-bug/\" class=\"ras\">Microsoft confirms another Windows print spooler zero-day bug</a></p> </div>"
            }
          ]
        },
        "options": {}
      },
      "name": "Set",
      "type": "n8n-nodes-base.set",
      "typeVersion": 1,
      "position": [
        -4200,
        -420
      ]
    },
    {
      "parameters": {
        "functionCode": "const html = items[0].json.html\n\nconst regex = /(<div class=\"cz-related-article-wrapp\">).*?(<\\/div>)/gmi;\n\nconst matched = html.match(regex);\n\nreturn [\n  {\n    json: {\n      html: html.replace(matched[0], '')\n    }\n  }\n]\n\n\n"
      },
      "name": "Function",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        -3990,
        -420
      ]
    }
  ],
  "connections": {
    "Start": {
      "main": [
        [
          {
            "node": "Set",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set": {
      "main": [
        [
          {
            "node": "Function",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

thanks, i’ll have a play and get back to you

and that works when i use your set and the function node, but when i use the {{$json[“Article”]}} from the previous node as my set value i get an error

ERROR: Cannot read property '0' of null

Can you share the input of the function I shared in your workflow?

I copied your reply from above and it worked
image

mine didn’t

here is the json exportof the workflow

{
  "name": "html extract",
  "nodes": [
    {
      "parameters": {},
      "name": "Start",
      "type": "n8n-nodes-base.start",
      "typeVersion": 1,
      "position": [
        -290,
        130
      ]
    },
    {
      "parameters": {
        "url": "https://www.bleepingcomputer.com/news/security/",
        "responseFormat": "string",
        "options": {}
      },
      "name": "HTTP Request",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 1,
      "position": [
        -110,
        140
      ]
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "url",
              "cssSelector": "a",
              "returnValue": "attribute",
              "attribute": "href",
              "returnArray": true
            }
          ]
        },
        "options": {}
      },
      "name": "HTML Extract",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        90,
        140
      ]
    },
    {
      "parameters": {
        "functionCode": "return items[0].json.url.map(item => {\n  return {\n    json: {\n      data:item\n    },\n  }\n});\n\n"
      },
      "name": "Function",
      "type": "n8n-nodes-base.function",
      "position": [
        280,
        140
      ],
      "typeVersion": 1
    },
    {
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{$json[\"data\"]}}",
              "operation": "contains",
              "value2": "/security/"
            },
            {
              "value1": "={{$json[\"data\"]}}",
              "operation": "notContains",
              "value2": "#"
            }
          ],
          "boolean": []
        }
      },
      "name": "IF",
      "type": "n8n-nodes-base.if",
      "typeVersion": 1,
      "position": [
        500,
        140
      ]
    },
    {
      "parameters": {
        "functionCode": "const data = [];\n\nfor (const item of items) {\n  if (data.includes(item.json.data)) continue\n  data.push(item.json.data)\n}\n\nreturn data.map((url) => ({ json:{ data: url } }))\n"
      },
      "name": "Function1",
      "type": "n8n-nodes-base.function",
      "position": [
        680,
        130
      ],
      "typeVersion": 1
    },
    {
      "parameters": {
        "url": "={{$json[\"data\"]}}",
        "responseFormat": "string",
        "options": {}
      },
      "name": "HTTP Request1",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 1,
      "position": [
        880,
        130
      ]
    },
    {
      "parameters": {
        "extractionValues": {
          "values": [
            {
              "key": "Title",
              "cssSelector": "h1"
            },
            {
              "key": "Article",
              "cssSelector": ".articleBody",
              "returnValue": "html"
            }
          ]
        },
        "options": {}
      },
      "name": "H1 Extract",
      "type": "n8n-nodes-base.htmlExtract",
      "typeVersion": 1,
      "position": [
        1080,
        140
      ],
      "executeOnce": false
    },
    {
      "parameters": {
        "keepOnlySet": true,
        "values": {
          "string": [
            {
              "name": "title",
              "value": "={{$json[\"Title\"]}}"
            },
            {
              "name": "html",
              "value": "={{$json[\"Article\"]}}"
            }
          ]
        },
        "options": {}
      },
      "name": "Set",
      "type": "n8n-nodes-base.set",
      "typeVersion": 1,
      "position": [
        1270,
        140
      ]
    },
    {
      "parameters": {
        "functionCode": "const html = items[0].json.html\n\nconst regex = /(<div class=\"cz-related-article-wrapp\">).*?(<\\/div>)/gmi;\n\nconst matched = html.match(regex);\n\nreturn [\n  {\n    json: {\n      html: html.replace(matched[0], '')\n    }\n  }\n]\n\n\n"
      },
      "name": "Function3",
      "type": "n8n-nodes-base.function",
      "typeVersion": 1,
      "position": [
        1460,
        140
      ]
    }
  ],
  "connections": {
    "HTTP Request": {
      "main": [
        [
          {
            "node": "HTML Extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML Extract": {
      "main": [
        [
          {
            "node": "Function",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Function": {
      "main": [
        [
          {
            "node": "IF",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "IF": {
      "main": [
        [
          {
            "node": "Function1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Function1": {
      "main": [
        [
          {
            "node": "HTTP Request1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request1": {
      "main": [
        [
          {
            "node": "H1 Extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "H1 Extract": {
      "main": [
        [
          {
            "node": "Set",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set": {
      "main": [
        [
          {
            "node": "Function3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": false,
  "settings": {},
  "id": 27
}

Ahh, yes, because I assumed it was just one item. You have 19 items there. Use the code below in the function node.

const regex = /(<div class="cz-related-article-wrapp">).*?(<\/div>)/gmi;

const response = [];

for (const item of items) {
  const html = item.json.html;
  const matched = html.match(regex);
  response.push({
    json: {
      html: html.replace(matched[0], '')
    }
  })

}

return response;

ah sorry, my bad , however I get the same error.

Do all the HTMLs have the class="cz-related-article-wrapp" ?

yes, but i think at the end, there are one or two articles that have no result for some reason. hmmm strange, i thought my IF filter got rid of those…

Probably the condition in the IF node it’s not correct.

Anyways, modified the function node so it omits the ones that are empty.

const regex = /(<div class="cz-related-article-wrapp">).*?(<\/div>)/gmi;

const response = [];

for (const item of items) {
  const html = item.json.html;
  const matched = html.match(regex);
  response.push({
    json: {
      html: Array.isArray(matched) ? html.replace(matched[0], '') : html
    }
  })
}

return response;