here is the json exportof the workflow
{
"name": "html extract",
"nodes": [
{
"parameters": {},
"name": "Start",
"type": "n8n-nodes-base.start",
"typeVersion": 1,
"position": [
-290,
130
]
},
{
"parameters": {
"url": "https://www.bleepingcomputer.com/news/security/",
"responseFormat": "string",
"options": {}
},
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 1,
"position": [
-110,
140
]
},
{
"parameters": {
"extractionValues": {
"values": [
{
"key": "url",
"cssSelector": "a",
"returnValue": "attribute",
"attribute": "href",
"returnArray": true
}
]
},
"options": {}
},
"name": "HTML Extract",
"type": "n8n-nodes-base.htmlExtract",
"typeVersion": 1,
"position": [
90,
140
]
},
{
"parameters": {
"functionCode": "return items[0].json.url.map(item => {\n return {\n json: {\n data:item\n },\n }\n});\n\n"
},
"name": "Function",
"type": "n8n-nodes-base.function",
"position": [
280,
140
],
"typeVersion": 1
},
{
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$json[\"data\"]}}",
"operation": "contains",
"value2": "/security/"
},
{
"value1": "={{$json[\"data\"]}}",
"operation": "notContains",
"value2": "#"
}
],
"boolean": []
}
},
"name": "IF",
"type": "n8n-nodes-base.if",
"typeVersion": 1,
"position": [
500,
140
]
},
{
"parameters": {
"functionCode": "const data = [];\n\nfor (const item of items) {\n if (data.includes(item.json.data)) continue\n data.push(item.json.data)\n}\n\nreturn data.map((url) => ({ json:{ data: url } }))\n"
},
"name": "Function1",
"type": "n8n-nodes-base.function",
"position": [
680,
130
],
"typeVersion": 1
},
{
"parameters": {
"url": "={{$json[\"data\"]}}",
"responseFormat": "string",
"options": {}
},
"name": "HTTP Request1",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 1,
"position": [
880,
130
]
},
{
"parameters": {
"extractionValues": {
"values": [
{
"key": "Title",
"cssSelector": "h1"
},
{
"key": "Article",
"cssSelector": ".articleBody",
"returnValue": "html"
}
]
},
"options": {}
},
"name": "H1 Extract",
"type": "n8n-nodes-base.htmlExtract",
"typeVersion": 1,
"position": [
1080,
140
],
"executeOnce": false
},
{
"parameters": {
"keepOnlySet": true,
"values": {
"string": [
{
"name": "title",
"value": "={{$json[\"Title\"]}}"
},
{
"name": "html",
"value": "={{$json[\"Article\"]}}"
}
]
},
"options": {}
},
"name": "Set",
"type": "n8n-nodes-base.set",
"typeVersion": 1,
"position": [
1270,
140
]
},
{
"parameters": {
"functionCode": "const html = items[0].json.html\n\nconst regex = /(<div class=\"cz-related-article-wrapp\">).*?(<\\/div>)/gmi;\n\nconst matched = html.match(regex);\n\nreturn [\n {\n json: {\n html: html.replace(matched[0], '')\n }\n }\n]\n\n\n"
},
"name": "Function3",
"type": "n8n-nodes-base.function",
"typeVersion": 1,
"position": [
1460,
140
]
}
],
"connections": {
"HTTP Request": {
"main": [
[
{
"node": "HTML Extract",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract": {
"main": [
[
{
"node": "Function",
"type": "main",
"index": 0
}
]
]
},
"Function": {
"main": [
[
{
"node": "IF",
"type": "main",
"index": 0
}
]
]
},
"IF": {
"main": [
[
{
"node": "Function1",
"type": "main",
"index": 0
}
]
]
},
"Function1": {
"main": [
[
{
"node": "HTTP Request1",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request1": {
"main": [
[
{
"node": "H1 Extract",
"type": "main",
"index": 0
}
]
]
},
"H1 Extract": {
"main": [
[
{
"node": "Set",
"type": "main",
"index": 0
}
]
]
},
"Set": {
"main": [
[
{
"node": "Function3",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {},
"id": 27
}