Workflow: Splitout Filter Automation

Workflow Details

Download Workflow
{
    "id": "7fdJOvYNILCr24fH",
    "meta": {
        "instanceId": "568298fde06d3db80a2eea77fe5bf45f0c7bb898dea20b769944e9ac7c6c5a80"
    },
    "name": "Read sitemap and filter URLs",
    "tags": [],
    "nodes": [
        {
            "id": "38910330-5286-4f3f-b62e-9216acccd503",
            "name": "\u2018Test workflow\u2019 trigger",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                -460,
                -60
            ],
            "parameters": [],
            "typeVersion": 1
        },
        {
            "id": "d4e5991b-62d9-45ca-962f-c1077f3bce19",
            "name": "Set sitemap URL",
            "type": "n8n-nodes-base.set",
            "position": [
                -280,
                -60
            ],
            "parameters": {
                "options": [],
                "assignments": {
                    "assignments": [
                        {
                            "id": "d6c5ac86-6d67-42fb-96ec-9826caf452e2",
                            "name": "sitemapUrl",
                            "type": "string",
                            "value": "https:\/\/duckduckgo.com\/sitemap.xml"
                        }
                    ]
                }
            },
            "typeVersion": 3.399999999999999911182158029987476766109466552734375
        },
        {
            "id": "0d957deb-5830-4077-97e4-437dc7c0e527",
            "name": "Split Out",
            "type": "n8n-nodes-base.splitOut",
            "position": [
                260,
                -60
            ],
            "parameters": {
                "options": [],
                "fieldToSplitOut": "urlset.url"
            },
            "typeVersion": 1
        },
        {
            "id": "7021088c-dfa1-4aae-b2e7-15b0ca10a750",
            "name": "Get Sitemap",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                -100,
                -60
            ],
            "parameters": {
                "url": "={{ $json.sitemapUrl }}",
                "options": []
            },
            "typeVersion": 4.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "d3b86577-01fc-40f8-ab65-93ba420187b8",
            "name": "Convert Sitemap to JSON",
            "type": "n8n-nodes-base.xml",
            "position": [
                80,
                -60
            ],
            "parameters": {
                "options": {
                    "trim": true,
                    "normalize": true,
                    "mergeAttrs": true,
                    "ignoreAttrs": true,
                    "normalizeTags": true
                }
            },
            "typeVersion": 1
        },
        {
            "id": "bc0758ae-06eb-4a29-a91e-414407ec8ade",
            "name": "Filter URLs",
            "type": "n8n-nodes-base.filter",
            "position": [
                440,
                -60
            ],
            "parameters": {
                "options": [],
                "conditions": {
                    "options": {
                        "version": 2,
                        "leftValue": "",
                        "caseSensitive": true,
                        "typeValidation": "strict"
                    },
                    "combinator": "and",
                    "conditions": [
                        {
                            "id": "0bf8e98c-b6c5-4129-852c-0d3e63f32f9f",
                            "operator": {
                                "type": "string",
                                "operation": "endsWith"
                            },
                            "leftValue": "={{ $json.loc }}",
                            "rightValue": ".pdf"
                        }
                    ]
                }
            },
            "typeVersion": 2.20000000000000017763568394002504646778106689453125
        },
        {
            "id": "1d3fed97-1e72-426c-a48d-1a9683f40c4c",
            "name": "Sticky Note1",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -300,
                -140
            ],
            "parameters": {
                "color": 6,
                "width": 150,
                "height": 240,
                "content": "**Set your sitemap.xml\nurl here.**"
            },
            "typeVersion": 1
        },
        {
            "id": "521ec74d-6707-47fd-992d-eecebed415ab",
            "name": "Sticky Note2",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                420,
                -140
            ],
            "parameters": {
                "color": 6,
                "width": 150,
                "height": 240,
                "content": "**Create your filter here.**"
            },
            "typeVersion": 1
        },
        {
            "id": "07e6c3de-cc72-490d-b614-67034ce04bfb",
            "name": "Sticky Note3",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -140,
                -180
            ],
            "parameters": {
                "color": 7,
                "width": 540,
                "height": 300,
                "content": "## Fetch and process the sitemap.xml file\nThis part fetches and process the sitemap.xml file from XML data to JSON that we can work with."
            },
            "typeVersion": 1
        },
        {
            "id": "abf5f02d-d2a0-43f1-9a1f-386cc4f9861b",
            "name": "Sticky Note",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                -780,
                -220
            ],
            "parameters": {
                "width": 280,
                "height": 420,
                "content": "## Sitemap.xml reader\nThis workflow reads an sitemap.xml and filters out the entries you want.\n\nBy default only PDF documents are returned at the end of the workflow.\n\n**SETUP**\n- Edit the **Set sitemap URL** block and add the url to the sitemap you want to read.\n\n- Edit the **Filter URLs** to your needs."
            },
            "typeVersion": 1
        }
    ],
    "active": false,
    "pinData": [],
    "settings": {
        "executionOrder": "v1"
    },
    "versionId": "74793599-4c7d-4532-bbd5-a2ce4761fbc8",
    "connections": {
        "Split Out": {
            "main": [
                [
                    {
                        "node": "Filter URLs",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Get Sitemap": {
            "main": [
                [
                    {
                        "node": "Convert Sitemap to JSON",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Set sitemap URL": {
            "main": [
                [
                    {
                        "node": "Get Sitemap",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Convert Sitemap to JSON": {
            "main": [
                [
                    {
                        "node": "Split Out",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "\u2018Test workflow\u2019 trigger": {
            "main": [
                [
                    {
                        "node": "Set sitemap URL",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    }
}
Back to Workflows

Related Workflows

Exponential Backoff for Google APIs
View
Get new time entries from Toggl
View
Get Airtable data in Obsidian Notes
View
Awss3 GoogleDrive Import Triggered
View
Forward Filtered Gmail Notifications to Telegram Chat
View
GitHub Manual Create Scheduled
View
HTTP Form Automation Webhook
View
Microsoftoutlook Schedule Automation Scheduled
View
Building RAG Chatbot for Movie Recommendations with Qdrant and Open AI
View
HubSpot Splitout Create Webhook
View