Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
222 lines
8.0 KiB
SQL
222 lines
8.0 KiB
SQL
-- Provider Discovery Pipeline - SQLite Schema (for local dev/testing)
|
|
-- Production uses Postgres (see schema.sql)
|
|
|
|
-- ============================================================
|
|
-- FUNERAL HOME
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS funeral_home (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
website TEXT,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
-- ============================================================
|
|
-- FUNERAL BRAND
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS funeral_brand (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
modal_description TEXT,
|
|
email TEXT,
|
|
phone TEXT,
|
|
website TEXT,
|
|
abn TEXT,
|
|
code TEXT UNIQUE,
|
|
sort INTEGER DEFAULT 0,
|
|
hidden INTEGER NOT NULL DEFAULT 1,
|
|
|
|
business_address TEXT,
|
|
business_suburb TEXT,
|
|
business_state TEXT,
|
|
business_postcode TEXT,
|
|
|
|
background_colour TEXT,
|
|
foreground_colour TEXT,
|
|
|
|
funeral_home_id INTEGER REFERENCES funeral_home(id) ON DELETE SET NULL,
|
|
|
|
verified INTEGER NOT NULL DEFAULT 0,
|
|
source_key TEXT UNIQUE,
|
|
source_url TEXT,
|
|
last_enriched_at TEXT,
|
|
enrichment_status TEXT NOT NULL DEFAULT 'pending' CHECK(enrichment_status IN ('pending','partial','complete','failed')),
|
|
|
|
-- Listing tier: verified | priced | estimated | listed
|
|
listing_tier TEXT NOT NULL DEFAULT 'listed'
|
|
CHECK(listing_tier IN ('verified','priced','estimated','listed')),
|
|
|
|
available_funeral_types TEXT,
|
|
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_brand_abn ON funeral_brand(abn);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_source_key ON funeral_brand(source_key);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_listing_tier ON funeral_brand(listing_tier);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_name_postcode ON funeral_brand(title, business_postcode);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_verified ON funeral_brand(verified);
|
|
CREATE INDEX IF NOT EXISTS idx_brand_hidden ON funeral_brand(hidden);
|
|
|
|
-- ============================================================
|
|
-- LOCATION
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS location (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
address TEXT,
|
|
suburb TEXT,
|
|
state TEXT,
|
|
postcode TEXT,
|
|
country TEXT DEFAULT 'Australia',
|
|
lat REAL,
|
|
lng REAL,
|
|
rating REAL,
|
|
rating_num INTEGER,
|
|
google_place_key TEXT,
|
|
|
|
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
|
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_location_brand ON location(brand_id);
|
|
CREATE INDEX IF NOT EXISTS idx_location_postcode ON location(postcode);
|
|
|
|
-- ============================================================
|
|
-- FUNERAL AREA
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS funeral_area (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
code TEXT,
|
|
description TEXT,
|
|
postcodes TEXT,
|
|
sort INTEGER DEFAULT 0,
|
|
hidden INTEGER DEFAULT 0,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS brand_funeral_area (
|
|
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
|
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
|
PRIMARY KEY (brand_id, funeral_area_id)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- PACKAGE
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS package (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
sort INTEGER DEFAULT 0,
|
|
hidden INTEGER DEFAULT 0,
|
|
for_whom TEXT,
|
|
religion TEXT,
|
|
funeral_type TEXT CHECK(funeral_type IN (
|
|
'Service & Cremation','Service & Burial','Cremation Only',
|
|
'Graveside Burial','Water Cremation'
|
|
)),
|
|
|
|
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
|
|
|
source_url TEXT,
|
|
extraction_confidence REAL,
|
|
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_package_brand ON package(brand_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS package_funeral_area (
|
|
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
|
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
|
PRIMARY KEY (package_id, funeral_area_id)
|
|
);
|
|
|
|
-- ============================================================
|
|
-- PACKAGE INCLUSION
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS package_inclusion (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
price REAL NOT NULL,
|
|
optional INTEGER NOT NULL DEFAULT 0,
|
|
complimentary INTEGER NOT NULL DEFAULT 0,
|
|
display INTEGER NOT NULL DEFAULT 1,
|
|
description TEXT,
|
|
sort INTEGER DEFAULT 0,
|
|
inclusion_type_title TEXT NOT NULL,
|
|
|
|
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
|
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_inclusion_package ON package_inclusion(package_id);
|
|
|
|
-- ============================================================
|
|
-- KNOWN FOR
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS known_for (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
title TEXT NOT NULL,
|
|
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_known_for_brand ON known_for(brand_id);
|
|
|
|
-- ============================================================
|
|
-- SOURCE LOG
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS source_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_name TEXT NOT NULL,
|
|
run_started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
run_finished_at TEXT,
|
|
records_found INTEGER DEFAULT 0,
|
|
records_new INTEGER DEFAULT 0,
|
|
records_updated INTEGER DEFAULT 0,
|
|
records_skipped INTEGER DEFAULT 0,
|
|
status TEXT DEFAULT 'running',
|
|
error_message TEXT,
|
|
metadata TEXT -- JSON string
|
|
);
|
|
|
|
-- ============================================================
|
|
-- SOURCE RECORD
|
|
-- ============================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS source_record (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_name TEXT NOT NULL,
|
|
source_id TEXT NOT NULL,
|
|
source_url TEXT,
|
|
raw_data TEXT NOT NULL, -- JSON string
|
|
normalized_data TEXT, -- JSON string
|
|
matched_brand_id INTEGER REFERENCES funeral_brand(id) ON DELETE SET NULL,
|
|
match_type TEXT,
|
|
processed_at TEXT,
|
|
log_id INTEGER REFERENCES source_log(id) ON DELETE SET NULL,
|
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
|
|
UNIQUE(source_name, source_id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_source_record_source ON source_record(source_name, source_id);
|