Workflow: Splitout Code Create

Workflow Details

Download Workflow
{
    "meta": {
        "instanceId": "4a11afdb3c52fd098e3eae9fad4b39fdf1bbcde142f596adda46c795e366b326"
    },
    "nodes": [
        {
            "id": "f1b36f4b-6558-4e83-a999-e6f2d24e196c",
            "name": "OpenRouter Chat Model",
            "type": "@n8n\/n8n-nodes-langchain.lmChatOpenRouter",
            "position": [
                620,
                240
            ],
            "parameters": {
                "model": "openai\/gpt-4.1",
                "options": []
            },
            "typeVersion": 1
        },
        {
            "id": "89ca0a07-286f-4e68-9e85-0327a4859cc0",
            "name": "Structured Output Parser",
            "type": "@n8n\/n8n-nodes-langchain.outputParserStructured",
            "position": [
                900,
                240
            ],
            "parameters": {
                "schemaType": "manual",
                "inputSchema": "{\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"name\": { \"type\": \"string\" },\n      \"description\": { \"type\": \"string\" },\n      \"rating\": { \"type\": \"number\" },\n      \"reviews\": { \"type\": \"integer\" },\n      \"price\": { \"type\": \"string\" }\n    },\n    \"required\": [\"name\", \"description\", \"rating\", \"reviews\", \"price\"]\n  }\n}"
            },
            "typeVersion": 1.1999999999999999555910790149937383830547332763671875
        },
        {
            "id": "e4800c1d-c0d8-4093-81ec-fc19ad0034cd",
            "name": "scrap url",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                240,
                60
            ],
            "parameters": {
                "url": "https:\/\/api.brightdata.com\/request",
                "method": "POST",
                "options": [],
                "sendBody": true,
                "sendHeaders": true,
                "bodyParameters": {
                    "parameters": [
                        {
                            "name": "zone",
                            "value": "web_unlocker1"
                        },
                        {
                            "name": "url",
                            "value": "={{ $json.url }}"
                        },
                        {
                            "name": "format",
                            "value": "raw"
                        }
                    ]
                },
                "headerParameters": {
                    "parameters": [
                        {
                            "name": "Authorization",
                            "value": "{{BRIGHTDATA_TOKEN}}"
                        }
                    ]
                }
            },
            "typeVersion": 4.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "1a1f768f-615d-4035-81b0-63b860f8e6ac",
            "name": "Sticky Note1",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                160,
                -140
            ],
            "parameters": {
                "content": "## Web Scraper API\n\n[Inscription - Free Trial](https:\/\/get.brightdata.com\/website-scraper)"
            },
            "typeVersion": 1
        },
        {
            "id": "2f260d96-4fff-4a4f-af29-1e43f465d54c",
            "name": "When clicking \u2018Test workflow\u2019",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                -440,
                200
            ],
            "parameters": [],
            "typeVersion": 1
        },
        {
            "id": "4be9033f-0b9f-466d-916e-88fbb2a80417",
            "name": "url",
            "type": "n8n-nodes-base.splitInBatches",
            "position": [
                20,
                200
            ],
            "parameters": {
                "options": []
            },
            "typeVersion": 3
        },
        {
            "id": "21b6d21c-b977-4175-9068-e0e2e19fa472",
            "name": "get urls to scrape",
            "type": "n8n-nodes-base.googleSheets",
            "position": [
                -200,
                200
            ],
            "parameters": {
                "options": [],
                "sheetName": "{{TRACK_SHEET_GID}}",
                "documentId": "{{WEB_SHEET_ID}}"
            },
            "credentials": {
                "googleSheetsOAuth2Api": {
                    "id": "KsXWRZTrfCUFrrHD",
                    "name": "Google Sheets"
                }
            },
            "typeVersion": 4.5
        },
        {
            "id": "25ef76ec-cf0d-422e-b060-68c49192a008",
            "name": "clean html",
            "type": "n8n-nodes-base.code",
            "position": [
                460,
                60
            ],
            "parameters": {
                "jsCode": "\/\/ CleanHtmlFunction.js\n\/\/ Purpose: n8n Function node to clean HTML: remove doctype, scripts, styles, head, comments, classes, extra blank lines, and non-whitelisted tags\n\nreturn items.map(item => {\n  const rawHtml = item.json.data;\n\n  \/\/ 1) remove doctype, scripts, styles, comments and head section, and strip class attributes\n  let cleaned = rawHtml\n    .replace(\/<!doctype html>\/gi, '')\n    .replace(\/<script[\\s\\S]*?<\\\/script>\/gi, '')\n    .replace(\/<style[\\s\\S]*?<\\\/style>\/gi, '')\n    .replace(\/<!--[\\s\\S]*?-->\/g, '')\n    .replace(\/<head[\\s\\S]*?<\\\/head>\/gi, '')\n    .replace(\/\\sclass=\"[^\"]*\"\/gi, '');\n\n  \/\/ 2) define whitelist of tags to keep\n  const allowedTags = [\n    'h1','h2','h3','h4','h5','h6',\n    'p','ul','ol','li',\n    'strong','em','a','blockquote',\n    'code','pre'\n  ];\n\n  \/\/ 3) strip out all tags not in the whitelist, reconstruct allowed tags cleanly\n  cleaned = cleaned.replace(\n    \/<\\\/?([a-z][a-z0-9]*)\\b[^>]*>\/gi,\n    (match, tagName) => {\n      const name = tagName.toLowerCase();\n      if (allowedTags.includes(name)) {\n        return match.startsWith('<\/') ? `<\/${name}>` : `<${name}>`;\n      }\n      return '';\n    }\n  );\n\n  \/\/ 4) collapse multiple blank or whitespace-only lines into a single newline\n  cleaned = cleaned.replace(\/(\\s*\\r?\\n\\s*){2,}\/g, '\\n');\n\n  \/\/ 5) trim leading\/trailing whitespace\n  cleaned = cleaned.trim();\n\n  return {\n    json: { cleanedHtml: cleaned }\n  };\n});"
            },
            "typeVersion": 2
        },
        {
            "id": "f72660d5-8427-4655-acbe-10365273c27b",
            "name": "extract data",
            "type": "@n8n\/n8n-nodes-langchain.chainLlm",
            "position": [
                680,
                60
            ],
            "parameters": {
                "text": "={{ $json.cleanedHtml }}",
                "messages": {
                    "messageValues": [
                        {
                            "message": "=You are an expert in web page scraping. Provide a structured response in JSON format. Only the response, without commentary.\n\nExtract the product information for {{ $(\u2018url\u2019).item.json.url.split(\u2019\/s?k=\u2019)[1].split(\u2019&\u2019)[0] }} present on the page.\n\nname\ndescription\nrating\nreviews\nprice"
                        }
                    ]
                },
                "promptType": "define",
                "hasOutputParser": true
            },
            "typeVersion": 1.600000000000000088817841970012523233890533447265625
        },
        {
            "id": "8b4af1bb-d7f8-456e-b630-ecd9b6e4bcdc",
            "name": "add results",
            "type": "n8n-nodes-base.googleSheets",
            "position": [
                1280,
                200
            ],
            "parameters": {
                "columns": {
                    "value": {
                        "name": "={{ $json.output.name }}",
                        "price": "={{ $json.output.price }}",
                        "rating": "={{ $json.output.rating }}",
                        "reviews": "={{ $json.output.reviews }}",
                        "description": "={{ $json.output.description }}"
                    },
                    "schema": [
                        {
                            "id": "name",
                            "type": "string"
                        },
                        {
                            "id": "description",
                            "type": "string"
                        },
                        {
                            "id": "rating",
                            "type": "string"
                        },
                        {
                            "id": "reviews",
                            "type": "string"
                        },
                        {
                            "id": "price",
                            "type": "string"
                        }
                    ],
                    "mappingMode": "defineBelow"
                },
                "options": [],
                "operation": "append",
                "sheetName": "{{RESULTS_SHEET_GID}}",
                "documentId": "{{WEB_SHEET_ID}}"
            },
            "credentials": {
                "googleSheetsOAuth2Api": {
                    "id": "KsXWRZTrfCUFrrHD",
                    "name": "Google Sheets"
                }
            },
            "typeVersion": 4.5
        },
        {
            "id": "7a5ba438-2ede-4d6c-b8fa-9a958ba1ef3e",
            "name": "Split items",
            "type": "n8n-nodes-base.splitOut",
            "position": [
                1060,
                60
            ],
            "parameters": {
                "include": "allOtherFields",
                "options": [],
                "fieldToSplitOut": "output"
            },
            "typeVersion": 1
        }
    ],
    "pinData": [],
    "connections": {
        "url": {
            "main": [
                [],
                [
                    {
                        "node": "scrap url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "scrap url": {
            "main": [
                [
                    {
                        "node": "clean html",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "clean html": {
            "main": [
                [
                    {
                        "node": "extract data",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Split items": {
            "main": [
                [
                    {
                        "node": "add results",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "add results": {
            "main": [
                [
                    {
                        "node": "url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "extract data": {
            "main": [
                [
                    {
                        "node": "Split items",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "get urls to scrape": {
            "main": [
                [
                    {
                        "node": "url",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "OpenRouter Chat Model": {
            "ai_languageModel": [
                [
                    {
                        "node": "extract data",
                        "type": "ai_languageModel",
                        "index": 0
                    }
                ]
            ]
        },
        "Structured Output Parser": {
            "ai_outputParser": [
                [
                    {
                        "node": "extract data",
                        "type": "ai_outputParser",
                        "index": 0
                    }
                ]
            ]
        },
        "When clicking \u2018Test workflow\u2019": {
            "main": [
                [
                    {
                        "node": "get urls to scrape",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    }
}
Back to Workflows

Related Workflows

Jiratool Schedule Automate Scheduled
View
Scrape Trustpilot Reviews to Google Sheets
View
Wait Splitout Send Webhook
View
Manual Postgres Automate Triggered
View
N8N EspaƱol - Ejemplos
View
Receive messages for an ActiveMQ queue via AMQP Trigger
View
Schedule Slack Create Scheduled
View
Dashboard
View
Telegram-bot AI Da Nang
View
Wise Airtable Automate Triggered
View