{
"id": "7fdJOvYNILCr24fH",
"meta": {
"instanceId": "568298fde06d3db80a2eea77fe5bf45f0c7bb898dea20b769944e9ac7c6c5a80"
},
"name": "Read sitemap and filter URLs",
"tags": [],
"nodes": [
{
"id": "38910330-5286-4f3f-b62e-9216acccd503",
"name": "\u2018Test workflow\u2019 trigger",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-460,
-60
],
"parameters": [],
"typeVersion": 1
},
{
"id": "d4e5991b-62d9-45ca-962f-c1077f3bce19",
"name": "Set sitemap URL",
"type": "n8n-nodes-base.set",
"position": [
-280,
-60
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "d6c5ac86-6d67-42fb-96ec-9826caf452e2",
"name": "sitemapUrl",
"type": "string",
"value": "https:\/\/duckduckgo.com\/sitemap.xml"
}
]
}
},
"typeVersion": 3.399999999999999911182158029987476766109466552734375
},
{
"id": "0d957deb-5830-4077-97e4-437dc7c0e527",
"name": "Split Out",
"type": "n8n-nodes-base.splitOut",
"position": [
260,
-60
],
"parameters": {
"options": [],
"fieldToSplitOut": "urlset.url"
},
"typeVersion": 1
},
{
"id": "7021088c-dfa1-4aae-b2e7-15b0ca10a750",
"name": "Get Sitemap",
"type": "n8n-nodes-base.httpRequest",
"position": [
-100,
-60
],
"parameters": {
"url": "={{ $json.sitemapUrl }}",
"options": []
},
"typeVersion": 4.20000000000000017763568394002504646778106689453125
},
{
"id": "d3b86577-01fc-40f8-ab65-93ba420187b8",
"name": "Convert Sitemap to JSON",
"type": "n8n-nodes-base.xml",
"position": [
80,
-60
],
"parameters": {
"options": {
"trim": true,
"normalize": true,
"mergeAttrs": true,
"ignoreAttrs": true,
"normalizeTags": true
}
},
"typeVersion": 1
},
{
"id": "bc0758ae-06eb-4a29-a91e-414407ec8ade",
"name": "Filter URLs",
"type": "n8n-nodes-base.filter",
"position": [
440,
-60
],
"parameters": {
"options": [],
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "0bf8e98c-b6c5-4129-852c-0d3e63f32f9f",
"operator": {
"type": "string",
"operation": "endsWith"
},
"leftValue": "={{ $json.loc }}",
"rightValue": ".pdf"
}
]
}
},
"typeVersion": 2.20000000000000017763568394002504646778106689453125
},
{
"id": "1d3fed97-1e72-426c-a48d-1a9683f40c4c",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-300,
-140
],
"parameters": {
"color": 6,
"width": 150,
"height": 240,
"content": "**Set your sitemap.xml\nurl here.**"
},
"typeVersion": 1
},
{
"id": "521ec74d-6707-47fd-992d-eecebed415ab",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
420,
-140
],
"parameters": {
"color": 6,
"width": 150,
"height": 240,
"content": "**Create your filter here.**"
},
"typeVersion": 1
},
{
"id": "07e6c3de-cc72-490d-b614-67034ce04bfb",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-140,
-180
],
"parameters": {
"color": 7,
"width": 540,
"height": 300,
"content": "## Fetch and process the sitemap.xml file\nThis part fetches and process the sitemap.xml file from XML data to JSON that we can work with."
},
"typeVersion": 1
},
{
"id": "abf5f02d-d2a0-43f1-9a1f-386cc4f9861b",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-780,
-220
],
"parameters": {
"width": 280,
"height": 420,
"content": "## Sitemap.xml reader\nThis workflow reads an sitemap.xml and filters out the entries you want.\n\nBy default only PDF documents are returned at the end of the workflow.\n\n**SETUP**\n- Edit the **Set sitemap URL** block and add the url to the sitemap you want to read.\n\n- Edit the **Filter URLs** to your needs."
},
"typeVersion": 1
}
],
"active": false,
"pinData": [],
"settings": {
"executionOrder": "v1"
},
"versionId": "74793599-4c7d-4532-bbd5-a2ce4761fbc8",
"connections": {
"Split Out": {
"main": [
[
{
"node": "Filter URLs",
"type": "main",
"index": 0
}
]
]
},
"Get Sitemap": {
"main": [
[
{
"node": "Convert Sitemap to JSON",
"type": "main",
"index": 0
}
]
]
},
"Set sitemap URL": {
"main": [
[
{
"node": "Get Sitemap",
"type": "main",
"index": 0
}
]
]
},
"Convert Sitemap to JSON": {
"main": [
[
{
"node": "Split Out",
"type": "main",
"index": 0
}
]
]
},
"\u2018Test workflow\u2019 trigger": {
"main": [
[
{
"node": "Set sitemap URL",
"type": "main",
"index": 0
}
]
]
}
}
}