Initial commit: funeral provider discovery pipeline
Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
This commit is contained in:
221
database/schema_sqlite.sql
Normal file
221
database/schema_sqlite.sql
Normal file
@@ -0,0 +1,221 @@
|
||||
-- Provider Discovery Pipeline - SQLite Schema (for local dev/testing)
|
||||
-- Production uses Postgres (see schema.sql)
|
||||
|
||||
-- ============================================================
|
||||
-- FUNERAL HOME
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS funeral_home (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
website TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- FUNERAL BRAND
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS funeral_brand (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
modal_description TEXT,
|
||||
email TEXT,
|
||||
phone TEXT,
|
||||
website TEXT,
|
||||
abn TEXT,
|
||||
code TEXT UNIQUE,
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden INTEGER NOT NULL DEFAULT 1,
|
||||
|
||||
business_address TEXT,
|
||||
business_suburb TEXT,
|
||||
business_state TEXT,
|
||||
business_postcode TEXT,
|
||||
|
||||
background_colour TEXT,
|
||||
foreground_colour TEXT,
|
||||
|
||||
funeral_home_id INTEGER REFERENCES funeral_home(id) ON DELETE SET NULL,
|
||||
|
||||
verified INTEGER NOT NULL DEFAULT 0,
|
||||
source_key TEXT UNIQUE,
|
||||
source_url TEXT,
|
||||
last_enriched_at TEXT,
|
||||
enrichment_status TEXT NOT NULL DEFAULT 'pending' CHECK(enrichment_status IN ('pending','partial','complete','failed')),
|
||||
|
||||
-- Listing tier: verified | priced | estimated | listed
|
||||
listing_tier TEXT NOT NULL DEFAULT 'listed'
|
||||
CHECK(listing_tier IN ('verified','priced','estimated','listed')),
|
||||
|
||||
available_funeral_types TEXT,
|
||||
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_abn ON funeral_brand(abn);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_source_key ON funeral_brand(source_key);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_listing_tier ON funeral_brand(listing_tier);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_name_postcode ON funeral_brand(title, business_postcode);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_verified ON funeral_brand(verified);
|
||||
CREATE INDEX IF NOT EXISTS idx_brand_hidden ON funeral_brand(hidden);
|
||||
|
||||
-- ============================================================
|
||||
-- LOCATION
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS location (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
address TEXT,
|
||||
suburb TEXT,
|
||||
state TEXT,
|
||||
postcode TEXT,
|
||||
country TEXT DEFAULT 'Australia',
|
||||
lat REAL,
|
||||
lng REAL,
|
||||
rating REAL,
|
||||
rating_num INTEGER,
|
||||
google_place_key TEXT,
|
||||
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_location_brand ON location(brand_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_location_postcode ON location(postcode);
|
||||
|
||||
-- ============================================================
|
||||
-- FUNERAL AREA
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS funeral_area (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
code TEXT,
|
||||
description TEXT,
|
||||
postcodes TEXT,
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden INTEGER DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS brand_funeral_area (
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (brand_id, funeral_area_id)
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- PACKAGE
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS package (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden INTEGER DEFAULT 0,
|
||||
for_whom TEXT,
|
||||
religion TEXT,
|
||||
funeral_type TEXT CHECK(funeral_type IN (
|
||||
'Service & Cremation','Service & Burial','Cremation Only',
|
||||
'Graveside Burial','Water Cremation'
|
||||
)),
|
||||
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
|
||||
source_url TEXT,
|
||||
extraction_confidence REAL,
|
||||
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_package_brand ON package(brand_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS package_funeral_area (
|
||||
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
||||
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (package_id, funeral_area_id)
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- PACKAGE INCLUSION
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS package_inclusion (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
price REAL NOT NULL,
|
||||
optional INTEGER NOT NULL DEFAULT 0,
|
||||
complimentary INTEGER NOT NULL DEFAULT 0,
|
||||
display INTEGER NOT NULL DEFAULT 1,
|
||||
description TEXT,
|
||||
sort INTEGER DEFAULT 0,
|
||||
inclusion_type_title TEXT NOT NULL,
|
||||
|
||||
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
||||
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_inclusion_package ON package_inclusion(package_id);
|
||||
|
||||
-- ============================================================
|
||||
-- KNOWN FOR
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS known_for (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_known_for_brand ON known_for(brand_id);
|
||||
|
||||
-- ============================================================
|
||||
-- SOURCE LOG
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS source_log (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_name TEXT NOT NULL,
|
||||
run_started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
run_finished_at TEXT,
|
||||
records_found INTEGER DEFAULT 0,
|
||||
records_new INTEGER DEFAULT 0,
|
||||
records_updated INTEGER DEFAULT 0,
|
||||
records_skipped INTEGER DEFAULT 0,
|
||||
status TEXT DEFAULT 'running',
|
||||
error_message TEXT,
|
||||
metadata TEXT -- JSON string
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- SOURCE RECORD
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS source_record (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_name TEXT NOT NULL,
|
||||
source_id TEXT NOT NULL,
|
||||
source_url TEXT,
|
||||
raw_data TEXT NOT NULL, -- JSON string
|
||||
normalized_data TEXT, -- JSON string
|
||||
matched_brand_id INTEGER REFERENCES funeral_brand(id) ON DELETE SET NULL,
|
||||
match_type TEXT,
|
||||
processed_at TEXT,
|
||||
log_id INTEGER REFERENCES source_log(id) ON DELETE SET NULL,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
|
||||
UNIQUE(source_name, source_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_source_record_source ON source_record(source_name, source_id);
|
||||
Reference in New Issue
Block a user