Workflow: Splitout Filter Automation

Workflow Details

Download Workflow
{
    "id": "7fdJOvYNILCr24fH",
    "meta": {
        "instanceId": "568298fde06d3db80a2eea77fe5bf45f0c7bb898dea20b769944e9ac7c6c5a80"
    },
    "name": "Read sitemap and filter URLs",
    "tags": [],
    "nodes": [
        {
            "id": "38910330-5286-4f3f-b62e-9216acccd503",
            "name": "\u2018Test workflow\u2019 trigger",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                -460,
                -60
            ],
            "parameters": [],
            "typeVersion": 1
        },
        {
            "id": "d4e5991b-62d9-45ca-962f-c1077f3bce19",
            "name": "Set sitemap URL",
            "type": "n8n-nodes-base.set",
            "position": [
                -280,
                -60
            ],
            "parameters": {
                "options": [],
                "assignments": {
                    "assignments": [
                        {
                            "id": "d6c5ac86-6d67-42fb-96ec-9826caf452e2",
                            "name": "sitemapUrl",
                            "type": "string",
                            "value": "https:\/\/duckduckgo.com\/sitemap.xml"
                        }
                    ]
                }
            },
            "typeVersion": 3.399999999999999911182158029987476766109466552734375
        },
        {
            "id": "0d957deb-5830-4077-97e4-437dc7c0e527",
            "name": "Split Out",
            "type": "n8n-nodes-base.splitOut",
            "position": [
                260,
                -60
            ],
            "parameters": {
                "options": [],
                "fieldToSplitOut": "urlset.url"
            },
            "typeVersion": 1
        },
        {
            "id": "7021088c-dfa1-4aae-b2e7-15b0ca10a750",
            "name": "Get Sitemap",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                -100,
                -60
            ],
            "parameters": {
                "url": "={{ $json.sitemapUrl }}",
                "options": []
            },
            "typeVersion": 4.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "d3b86577-01fc-40f8-ab65-93ba420187b8",
            "name": "Convert Sitemap to JSON",
            "type": "n8n-nodes-base.xml",
            "position": [
                80,
                -60
            ],
            "parameters": {
                "options": {
                    "trim": true,
                    "normalize": true,
                    "mergeAttrs": true,
                    "ignoreAttrs": true,
                    "normalizeTags": true
                }
            },
            "typeVersion": 1
        },
        {
            "id": "bc0758ae-06eb-4a29-a91e-414407ec8ade",
            "name": "Filter URLs",
            "type": "n8n-nodes-base.filter",
            "position": [
                440,
                -60
            ],
            "parameters": {
                "options": [],
                "conditions": {
                    "options": {
                        "version": 2,
                        "leftValue": "",
                        "caseSensitive": true,
                        "typeValidation": "strict"
                    },
                    "combinator": "and",
                    "conditions": [
                        {
                            "id": "0bf8e98c-b6c5-4129-852c-0d3e63f32f9f",
                            "operator": {
                                "type": "string",
                                "operation": "endsWith"
                            },
                            "leftValue": "={{ $json.loc }}",
                            "rightValue": ".pdf"
                        }
                    ]
                }
            },
            "typeVersion": 2.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "1d3fed97-1e72-426c-a48d-1a9683f40c4c",
            "name": "Sticky Note1",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -300,
                -140
            ],
            "parameters": {
                "color": 6,
                "width": 150,
                "height": 240,
                "content": "**Set your sitemap.xml\nurl here.**"
            },
            "typeVersion": 1
        },
        {
            "id": "521ec74d-6707-47fd-992d-eecebed415ab",
            "name": "Sticky Note2",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                420,
                -140
            ],
            "parameters": {
                "color": 6,
                "width": 150,
                "height": 240,
                "content": "**Create your filter here.**"
            },
            "typeVersion": 1
        },
        {
            "id": "07e6c3de-cc72-490d-b614-67034ce04bfb",
            "name": "Sticky Note3",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -140,
                -180
            ],
            "parameters": {
                "color": 7,
                "width": 540,
                "height": 300,
                "content": "## Fetch and process the sitemap.xml file\nThis part fetches and process the sitemap.xml file from XML data to JSON that we can work with."
            },
            "typeVersion": 1
        },
        {
            "id": "abf5f02d-d2a0-43f1-9a1f-386cc4f9861b",
            "name": "Sticky Note",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -780,
                -220
            ],
            "parameters": {
                "width": 280,
                "height": 420,
                "content": "## Sitemap.xml reader\nThis workflow reads an sitemap.xml and filters out the entries you want.\n\nBy default only PDF documents are returned at the end of the workflow.\n\n**SETUP**\n- Edit the **Set sitemap URL** block and add the url to the sitemap you want to read.\n\n- Edit the **Filter URLs** to your needs."
            },
            "typeVersion": 1
        }
    ],
    "active": false,
    "pinData": [],
    "settings": {
        "executionOrder": "v1"
    },
    "versionId": "74793599-4c7d-4532-bbd5-a2ce4761fbc8",
    "connections": {
        "Split Out": {
            "main": [
                [
                    {
                        "node": "Filter URLs",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Get Sitemap": {
            "main": [
                [
                    {
                        "node": "Convert Sitemap to JSON",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Set sitemap URL": {
            "main": [
                [
                    {
                        "node": "Get Sitemap",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Convert Sitemap to JSON": {
            "main": [
                [
                    {
                        "node": "Split Out",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "\u2018Test workflow\u2019 trigger": {
            "main": [
                [
                    {
                        "node": "Set sitemap URL",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    }
}
Back to Workflows

Related Workflows

Selenium Ultimate Scraper Workflow
View
Get Product Feedback
View
Get today's date and day using the Function node
View
💥🛠️Automate Blog Content Creation with GPT-4, Perplexity & WordPress
View
Splitout Filter Create Webhook
View
Parse DMARC reports
View
Manual HTTP Update Webhook
View
Manual Automate Triggered
View
Automate Content Generator for WordPress with DeepSeek R1
View
Insert data into a new row for a table in Coda
View