Initial commit: funeral provider discovery pipeline
Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
This commit is contained in:
100
n8n/workflows/2_daily_website_discovery.json
Normal file
100
n8n/workflows/2_daily_website_discovery.json
Normal file
@@ -0,0 +1,100 @@
|
||||
{
|
||||
"name": "2. Daily Website Discovery",
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {
|
||||
"rule": {
|
||||
"interval": [{ "field": "days", "daysInterval": 1, "triggerAtHour": 4 }]
|
||||
}
|
||||
},
|
||||
"id": "schedule",
|
||||
"name": "Daily Schedule",
|
||||
"type": "n8n-nodes-base.scheduleTrigger",
|
||||
"typeVersion": 1.2,
|
||||
"position": [200, 300]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"command": "cd /opt/crawlers && python3 -c \"from base import get_db; db=get_db(); n=db.execute('SELECT COUNT(*) as n FROM funeral_brand WHERE website IS NULL AND verified=0').fetchone()['n']; print(n)\" 2>&1"
|
||||
},
|
||||
"id": "check_queue",
|
||||
"name": "Check Queue Size",
|
||||
"type": "n8n-nodes-base.executeCommand",
|
||||
"typeVersion": 1,
|
||||
"position": [450, 300]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"conditions": {
|
||||
"conditions": [
|
||||
{
|
||||
"id": "has_work",
|
||||
"leftValue": "={{ parseInt($json.stdout.trim()) }}",
|
||||
"rightValue": 0,
|
||||
"operator": { "type": "number", "operation": "gt" }
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"id": "has_work",
|
||||
"name": "Providers Need Websites?",
|
||||
"type": "n8n-nodes-base.if",
|
||||
"typeVersion": 2.2,
|
||||
"position": [700, 300]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"command": "cd /opt/crawlers && python3 lookup_abn.py --limit=100 2>&1"
|
||||
},
|
||||
"id": "abn_lookup",
|
||||
"name": "ABN Lookup (batch 100)",
|
||||
"type": "n8n-nodes-base.executeCommand",
|
||||
"typeVersion": 1,
|
||||
"position": [950, 200]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"command": "cd /opt/crawlers && python3 discover_websites.py --limit=100 2>&1"
|
||||
},
|
||||
"id": "discover",
|
||||
"name": "Discover Websites (batch 100)",
|
||||
"type": "n8n-nodes-base.executeCommand",
|
||||
"typeVersion": 1,
|
||||
"position": [1250, 200]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "const output = $input.first().json.stdout || '';\nconst foundMatch = output.match(/(\\d+) websites found/);\nconst found = foundMatch ? parseInt(foundMatch[1]) : 0;\nreturn [{ json: { message: `Website discovery batch complete. ${found} websites found.`, output } }];"
|
||||
},
|
||||
"id": "summary",
|
||||
"name": "Build Summary",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [1500, 200]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"jsCode": "return [{ json: { message: 'No providers need website discovery.' } }];"
|
||||
},
|
||||
"id": "skip",
|
||||
"name": "Skip",
|
||||
"type": "n8n-nodes-base.code",
|
||||
"typeVersion": 2,
|
||||
"position": [950, 420]
|
||||
}
|
||||
],
|
||||
"connections": {
|
||||
"Daily Schedule": { "main": [[ { "node": "Check Queue Size", "type": "main", "index": 0 } ]] },
|
||||
"Check Queue Size": { "main": [[ { "node": "Providers Need Websites?", "type": "main", "index": 0 } ]] },
|
||||
"Providers Need Websites?": {
|
||||
"main": [
|
||||
[{ "node": "ABN Lookup (batch 100)", "type": "main", "index": 0 }],
|
||||
[{ "node": "Skip", "type": "main", "index": 0 }]
|
||||
]
|
||||
},
|
||||
"ABN Lookup (batch 100)": { "main": [[ { "node": "Discover Websites (batch 100)", "type": "main", "index": 0 } ]] },
|
||||
"Discover Websites (batch 100)": { "main": [[ { "node": "Build Summary", "type": "main", "index": 0 } ]] }
|
||||
},
|
||||
"settings": { "executionOrder": "v1" },
|
||||
"tags": [{ "name": "funeral-arranger" }]
|
||||
}
|
||||
Reference in New Issue
Block a user