{
"id": "xEij0kj2I1DHbL3I",
"meta": {
"instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef",
"templateCredsSetupCompleted": true
},
"name": "\ud83d\udca1\ud83c\udf10 Essential Multipage Website Scraper with Jina.ai",
"tags": [],
"nodes": [
{
"id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-840,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 320,
"content": "## Jina.ai Web Scraper\n### No API Key Required\n"
},
"typeVersion": 1
},
{
"id": "c5217a1a-f074-409b-8340-72afdc5fc8b5",
"name": "When clicking \u2018Test workflow\u2019",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1500,
-300
],
"parameters": [],
"typeVersion": 1
},
{
"id": "72af3b00-2632-4877-a0b6-7477e2f468f7",
"name": "Loop Over Items",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1080,
20
],
"parameters": {
"options": []
},
"typeVersion": 3
},
{
"id": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
"name": "Wait",
"type": "n8n-nodes-base.wait",
"position": [
80,
220
],
"webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448",
"parameters": [],
"typeVersion": 1.100000000000000088817841970012523233890533447265625
},
{
"id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
"name": "Limit",
"type": "n8n-nodes-base.limit",
"position": [
80,
-300
],
"parameters": {
"maxItems": 20
},
"typeVersion": 1
},
{
"id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
"name": "Get List of Website URLs",
"type": "n8n-nodes-base.httpRequest",
"position": [
-780,
-300
],
"parameters": {
"url": "={{ $json.sitemap_url }}",
"options": []
},
"typeVersion": 4.20000000000000017763568394002504646778106689453125
},
{
"id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
"name": "Convert to JSON",
"type": "n8n-nodes-base.xml",
"position": [
-560,
-300
],
"parameters": {
"options": []
},
"typeVersion": 1
},
{
"id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
"name": "Create List of Website URLs",
"type": "n8n-nodes-base.splitOut",
"position": [
-340,
-300
],
"parameters": {
"options": [],
"fieldToSplitOut": "urlset.url"
},
"typeVersion": 1
},
{
"id": "61555239-8a16-424e-8a60-700f6ebaa270",
"name": "Filter By Topics or Pages",
"type": "n8n-nodes-base.filter",
"position": [
-120,
-300
],
"parameters": {
"options": [],
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "d66c304d-879a-4dc4-908f-ab0665093672",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.loc }}",
"rightValue": "=https:\/\/ai.pydantic.dev\/"
},
{
"id": "3c930950-bee4-442b-82e6-4437fd39a933",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "agent"
},
{
"id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "tool"
}
]
}
},
"typeVersion": 2.20000000000000017763568394002504646778106689453125
},
{
"id": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
"name": "Set Website URL",
"type": "n8n-nodes-base.set",
"position": [
-1080,
-300
],
"parameters": {
"options": [],
"assignments": {
"assignments": [
{
"id": "1601dc3e-8024-4e19-b592-93a4e4f77641",
"name": "sitemap_url",
"type": "string",
"value": "https:\/\/ai.pydantic.dev\/sitemap.xml"
}
]
}
},
"typeVersion": 3.399999999999999911182158029987476766109466552734375
},
{
"id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
"name": "Jina.ai Web Scraper",
"type": "n8n-nodes-base.httpRequest",
"position": [
-720,
120
],
"parameters": {
"url": "=https:\/\/r.jina.ai\/{{ $json.loc }}",
"options": []
},
"typeVersion": 4.20000000000000017763568394002504646778106689453125
},
{
"id": "be253ec2-f088-4895-8ef2-61a3720cf68b",
"name": "Save Webpage Contents to Google Drive",
"type": "n8n-nodes-base.googleDrive",
"position": [
-120,
120
],
"parameters": {
"name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}",
"content": "={{ $json.markdown }}",
"driveId": {
"__rl": true,
"mode": "list",
"value": "My Drive"
},
"options": [],
"folderId": {
"__rl": true,
"mode": "list",
"value": "root",
"cachedResultName": "\/ (Root folder)"
},
"operation": "createFromText"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "UhdXGYLTAJbsa0xX",
"name": "Google Drive account"
}
},
"typeVersion": 3
},
{
"id": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
"name": "Extract Title & Markdown Content",
"type": "n8n-nodes-base.code",
"position": [
-380,
120
],
"parameters": {
"jsCode": "\/\/ Get the text output from the previous node\nconst data = $input.first().json.data;\n\n\/\/ Regular expression to capture the title line\nconst titleRegex = \/^Title:\\s*(.+)$\/m;\n\/\/ Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = \/Markdown Content:\\n([\\s\\S]+)\/;\n\n\/\/ Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n\/\/ Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n\/\/ Return a single object with title and markdown as unique values\nreturn { title, markdown };"
},
"typeVersion": 2
},
{
"id": "2fb86c81-c144-4450-908c-559855deadef",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1240,
-580
],
"parameters": {
"color": 7,
"width": 1540,
"height": 1080,
"content": "# \ud83d\udca1\ud83c\udf10 Essential Multipage Website Scraper with Jina.ai\n## Scrape entire websites with this workflow\n**Use responsibly and follow local rules and regulations**"
},
"typeVersion": 1
},
{
"id": "b470b294-95d0-4e51-a9cc-2fe17316a771",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1580,
-400
],
"parameters": {
"color": 4,
"width": 280,
"height": 300,
"content": "## \ud83d\udc4dTry Me!"
},
"typeVersion": 1
},
{
"id": "fafd0623-a423-4e73-9609-cee8e81f5c13",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1180,
-400
],
"parameters": {
"width": 300,
"height": 300,
"content": "## \ud83d\udc47Add Website Sitemap URL"
},
"typeVersion": 1
}
],
"active": false,
"pinData": [],
"settings": {
"executionOrder": "v1"
},
"versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5",
"connections": {
"Wait": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Limit": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Convert to JSON": {
"main": [
[
{
"node": "Create List of Website URLs",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[],
[
{
"node": "Jina.ai Web Scraper",
"type": "main",
"index": 0
}
]
]
},
"Set Website URL": {
"main": [
[
{
"node": "Get List of Website URLs",
"type": "main",
"index": 0
}
]
]
},
"Jina.ai Web Scraper": {
"main": [
[
{
"node": "Extract Title & Markdown Content",
"type": "main",
"index": 0
}
]
]
},
"Get List of Website URLs": {
"main": [
[
{
"node": "Convert to JSON",
"type": "main",
"index": 0
}
]
]
},
"Filter By Topics or Pages": {
"main": [
[
{
"node": "Limit",
"type": "main",
"index": 0
}
]
]
},
"Create List of Website URLs": {
"main": [
[
{
"node": "Filter By Topics or Pages",
"type": "main",
"index": 0
}
]
]
},
"Extract Title & Markdown Content": {
"main": [
[
{
"node": "Save Webpage Contents to Google Drive",
"type": "main",
"index": 0
}
]
]
},
"When clicking \u2018Test workflow\u2019": {
"main": [
[
{
"node": "Set Website URL",
"type": "main",
"index": 0
}
]
]
},
"Save Webpage Contents to Google Drive": {
"main": [
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
}
}
}