{ "name": "3. Daily Website Enrichment", "nodes": [ { "parameters": { "rule": { "interval": [{ "field": "days", "daysInterval": 1, "triggerAtHour": 6 }] } }, "id": "schedule", "name": "Daily Schedule", "type": "n8n-nodes-base.scheduleTrigger", "typeVersion": 1.2, "position": [200, 300] }, { "parameters": { "command": "cd /opt/crawlers && python3 enrich_websites.py --limit=50 2>&1" }, "id": "enrich", "name": "Crawl & Extract (batch 50)", "type": "n8n-nodes-base.executeCommand", "typeVersion": 1, "position": [450, 300], "executeOnce": true }, { "parameters": { "command": "cd /opt/crawlers && python3 -c \"\nimport json, sqlite3\ndb = sqlite3.connect('/opt/database/providers.db')\ndb.row_factory = sqlite3.Row\nrows = db.execute('''\n SELECT sr.id, sr.source_url, sr.matched_brand_id,\n json_extract(sr.raw_data, \\\"$.pricing_text\\\") as pricing_text,\n json_extract(sr.raw_data, \\\"$.has_pricing\\\") as has_pricing\n FROM source_record sr\n WHERE sr.source_name = 'website_crawl'\n AND sr.processed_at IS NULL\n AND json_extract(sr.raw_data, \\\"$.has_pricing\\\") = 1\n LIMIT 20\n''').fetchall()\nresult = [{'id': r['id'], 'brand_id': r['matched_brand_id'], 'url': r['source_url'], 'text_length': len(r['pricing_text'] or '')} for r in rows]\nprint(json.dumps(result))\n\" 2>&1" }, "id": "get_queue", "name": "Get Pricing Pages Queue", "type": "n8n-nodes-base.executeCommand", "typeVersion": 1, "position": [700, 300] }, { "parameters": { "jsCode": "const output = $input.first().json.stdout.trim();\ntry {\n const items = JSON.parse(output);\n return items.map(item => ({ json: item }));\n} catch(e) {\n return [{ json: { error: 'No pricing pages to process', raw: output } }];\n}" }, "id": "parse_queue", "name": "Parse Queue Items", "type": "n8n-nodes-base.code", "typeVersion": 2, "position": [950, 300] }, { "parameters": { "conditions": { "conditions": [ { "id": "has_text", "leftValue": "={{ $json.text_length }}", "rightValue": 100, "operator": { "type": "number", "operation": "gt" } } ] } }, "id": "has_text", "name": "Has Pricing Text?", "type": "n8n-nodes-base.if", "typeVersion": 2.2, "position": [1200, 300] }, { "parameters": { "command": "={{ 'cd /opt/crawlers && python3 -c \"import json, sqlite3; db=sqlite3.connect(\\'/opt/database/providers.db\\'); r=db.execute(\\'SELECT json_extract(raw_data, \\\\\\\"$.pricing_text\\\\\\\") as t FROM source_record WHERE id=' + $json.id + '\\').fetchone(); print(r[0][:6000] if r and r[0] else \\'\\')\"' }}" }, "id": "get_text", "name": "Get Pricing Text", "type": "n8n-nodes-base.executeCommand", "typeVersion": 1, "position": [1450, 240] }, { "parameters": { "url": "https://api.anthropic.com/v1/messages", "sendHeaders": true, "headerParameters": { "parameters": [ { "name": "x-api-key", "value": "={{ $env.ANTHROPIC_API_KEY }}" }, { "name": "anthropic-version", "value": "2023-06-01" }, { "name": "content-type", "value": "application/json" } ] }, "sendBody": true, "specifyBody": "json", "jsonBody": "={{ JSON.stringify({ model: 'claude-haiku-4-5-20251001', max_tokens: 2048, messages: [{ role: 'user', content: 'Extract funeral packages and pricing from this funeral director\\'s pricing page. Return ONLY valid JSON matching this schema:\\n\\n{\\n \"packages\": [\\n {\\n \"name\": \"Package name\",\\n \"funeralType\": \"one of: Service & Cremation, Service & Burial, Cremation Only, Graveside Burial\",\\n \"price\": 0,\\n \"inclusions\": [\\n {\"item\": \"Inclusion name\", \"price\": 0, \"optional\": false, \"complimentary\": false}\\n ]\\n }\\n ]\\n}\\n\\nUse these inclusion type names where possible: Professional Service Fee, Transportation Service Fee, Professional Mortuary Care, Death Registration Certificate, Cremation Certificate/Permit, Government Levy, Accommodation, Viewing Fee, Coffin, Cremation Fee, Saturday Service Fee, Dressing Fee, Embalming, Digital Recording, Webstreaming, After Hours Transfer Surcharge.\\n\\nIf a price cannot be determined, use null. If no packages/pricing found, return {\"packages\": []}.\\n\\nPricing page text:\\n' + $('Get Pricing Text').first().json.stdout.substring(0, 5000) }] }) }}" }, "id": "ai_extract", "name": "AI Extract (Claude Haiku)", "type": "n8n-nodes-base.httpRequest", "typeVersion": 4.2, "position": [1700, 240] }, { "parameters": { "jsCode": "const response = $input.first().json;\nconst sourceId = $('Parse Queue Items').first().json.id;\nconst brandId = $('Parse Queue Items').first().json.brand_id;\n\nlet packages = [];\ntry {\n const content = response.content[0].text;\n // Extract JSON from the response (may be wrapped in markdown)\n const jsonMatch = content.match(/\\{[\\s\\S]*\\}/);\n if (jsonMatch) {\n const parsed = JSON.parse(jsonMatch[0]);\n packages = parsed.packages || [];\n }\n} catch(e) {\n // AI response wasn't valid JSON\n}\n\nreturn [{ json: { sourceId, brandId, packages, packageCount: packages.length } }];" }, "id": "parse_ai", "name": "Parse AI Response", "type": "n8n-nodes-base.code", "typeVersion": 2, "position": [1950, 240] }, { "parameters": { "command": "={{ 'cd /opt/crawlers && python3 -c \"\\nimport json, sqlite3\\ndb = sqlite3.connect(\\'/opt/database/providers.db\\')\\npackages = ' + JSON.stringify(JSON.stringify($json.packages)) + '\\npackages = json.loads(packages)\\nbrand_id = ' + $json.brandId + '\\nsource_id = ' + $json.sourceId + '\\n\\nfor pkg in packages:\\n if not pkg.get(\\'price\\'):\\n continue\\n cur = db.execute(\\n \\'INSERT INTO package (title, funeral_type, brand_id, source_url, extraction_confidence) VALUES (?, ?, ?, ?, ?)\\',\\n (pkg[\\'name\\'], pkg.get(\\'funeralType\\'), brand_id, \\'ai_extraction\\', 0.7)\\n )\\n pkg_id = cur.lastrowid\\n for inc in pkg.get(\\'inclusions\\', []):\\n if inc.get(\\'price\\') is not None:\\n db.execute(\\n \\'INSERT INTO package_inclusion (price, optional, complimentary, inclusion_type_title, package_id) VALUES (?, ?, ?, ?, ?)\\',\\n (inc[\\'price\\'], 1 if inc.get(\\'optional\\') else 0, 1 if inc.get(\\'complimentary\\') else 0, inc[\\'item\\'], pkg_id)\\n )\\n\\ndb.execute(\\'UPDATE source_record SET processed_at=datetime(\\\\\\'now\\\\\\') WHERE id=?\\', (source_id,))\\ndb.execute(\\'UPDATE funeral_brand SET enrichment_status=\\\\\\'complete\\\\\\', last_enriched_at=datetime(\\\\\\'now\\\\\\') WHERE id=?\\', (brand_id,))\\ndb.commit()\\nprint(f\\'{len(packages)} packages saved for brand {brand_id}\\')\\n\" 2>&1' }}" }, "id": "save_packages", "name": "Save Packages to DB", "type": "n8n-nodes-base.executeCommand", "typeVersion": 1, "position": [2200, 240] }, { "parameters": { "command": "cd /opt/crawlers && python3 compute_tiers.py 2>&1" }, "id": "recompute_tiers", "name": "Recompute Listing Tiers", "type": "n8n-nodes-base.executeCommand", "typeVersion": 1, "position": [2450, 300] } ], "connections": { "Daily Schedule": { "main": [[ { "node": "Crawl & Extract (batch 50)", "type": "main", "index": 0 } ]] }, "Crawl & Extract (batch 50)": { "main": [[ { "node": "Get Pricing Pages Queue", "type": "main", "index": 0 } ]] }, "Get Pricing Pages Queue": { "main": [[ { "node": "Parse Queue Items", "type": "main", "index": 0 } ]] }, "Parse Queue Items": { "main": [[ { "node": "Has Pricing Text?", "type": "main", "index": 0 } ]] }, "Has Pricing Text?": { "main": [ [{ "node": "Get Pricing Text", "type": "main", "index": 0 }], [{ "node": "Recompute Listing Tiers", "type": "main", "index": 0 }] ] }, "Get Pricing Text": { "main": [[ { "node": "AI Extract (Claude Haiku)", "type": "main", "index": 0 } ]] }, "AI Extract (Claude Haiku)": { "main": [[ { "node": "Parse AI Response", "type": "main", "index": 0 } ]] }, "Parse AI Response": { "main": [[ { "node": "Save Packages to DB", "type": "main", "index": 0 } ]] }, "Save Packages to DB": { "main": [[ { "node": "Recompute Listing Tiers", "type": "main", "index": 0 } ]] } }, "settings": { "executionOrder": "v1" }, "tags": [{ "name": "funeral-arranger" }] }