Initial commit: funeral provider discovery pipeline
Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
This commit is contained in:
285
database/schema.sql
Normal file
285
database/schema.sql
Normal file
@@ -0,0 +1,285 @@
|
||||
-- Provider Discovery Pipeline - Database Schema
|
||||
-- Designed for Postgres. Compatible with SilverStripe CMS adaptation.
|
||||
--
|
||||
-- This schema covers the provider-facing tables needed for both
|
||||
-- verified (signed-up) and unverified (auto-discovered) providers.
|
||||
-- Product catalog tables (coffins, venues, etc.) are NOT included here —
|
||||
-- those only apply to verified providers and live in the main CMS.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ============================================================
|
||||
-- ENUMS
|
||||
-- ============================================================
|
||||
|
||||
CREATE TYPE enrichment_status AS ENUM ('pending', 'partial', 'complete', 'failed');
|
||||
|
||||
-- Listing tier determines how a provider appears on the platform.
|
||||
-- Computed from data quality: verified status + packages + inclusions.
|
||||
CREATE TYPE listing_tier AS ENUM (
|
||||
'verified', -- Tier 1: Signed up, full branding, arrangements enabled
|
||||
'priced', -- Tier 2: Unverified, 2+ packages with itemized inclusion prices
|
||||
'estimated', -- Tier 3: Unverified, at least one total package price
|
||||
'listed' -- Tier 4: Unverified, contact info only, no pricing
|
||||
);
|
||||
|
||||
CREATE TYPE funeral_type_enum AS ENUM (
|
||||
'Service & Cremation',
|
||||
'Service & Burial',
|
||||
'Cremation Only',
|
||||
'Graveside Burial',
|
||||
'Water Cremation'
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- 1. FUNERAL HOME (parent organisation)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE funeral_home (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
website TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- 2. FUNERAL BRAND (customer-facing provider)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE funeral_brand (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
modal_description TEXT,
|
||||
email TEXT,
|
||||
phone TEXT,
|
||||
website TEXT,
|
||||
abn TEXT,
|
||||
code TEXT UNIQUE, -- URL slug (e.g. "hparsons")
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden BOOLEAN NOT NULL DEFAULT TRUE, -- unverified start hidden
|
||||
|
||||
-- Address
|
||||
business_address TEXT,
|
||||
business_suburb TEXT,
|
||||
business_state TEXT,
|
||||
business_postcode TEXT,
|
||||
|
||||
-- Branding (nullable — unverified providers have no images)
|
||||
background_colour TEXT,
|
||||
foreground_colour TEXT,
|
||||
|
||||
-- Organisation
|
||||
funeral_home_id INTEGER REFERENCES funeral_home(id) ON DELETE SET NULL,
|
||||
|
||||
-- Verified vs auto-discovered
|
||||
verified BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
|
||||
-- Provenance tracking
|
||||
source_key TEXT UNIQUE, -- "{source}:{externalId}" for dedup
|
||||
source_url TEXT, -- where this record was found
|
||||
last_enriched_at TIMESTAMPTZ,
|
||||
enrichment_status enrichment_status NOT NULL DEFAULT 'pending',
|
||||
|
||||
-- Listing tier (computed from data quality)
|
||||
listing_tier listing_tier NOT NULL DEFAULT 'listed',
|
||||
|
||||
-- Funeral types offered (comma-separated IDs, same as existing CMS)
|
||||
available_funeral_types TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Deduplication indexes
|
||||
CREATE INDEX idx_brand_abn ON funeral_brand(abn) WHERE abn IS NOT NULL;
|
||||
CREATE INDEX idx_brand_listing_tier ON funeral_brand(listing_tier);
|
||||
CREATE INDEX idx_brand_source_key ON funeral_brand(source_key) WHERE source_key IS NOT NULL;
|
||||
CREATE INDEX idx_brand_name_postcode ON funeral_brand(title, business_postcode);
|
||||
CREATE INDEX idx_brand_verified ON funeral_brand(verified);
|
||||
CREATE INDEX idx_brand_hidden ON funeral_brand(hidden);
|
||||
CREATE INDEX idx_brand_enrichment ON funeral_brand(enrichment_status) WHERE verified = FALSE;
|
||||
|
||||
-- ============================================================
|
||||
-- 3. LOCATION (physical office/chapel)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE location (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL, -- display name (e.g. "Kingaroy, QLD")
|
||||
address TEXT,
|
||||
suburb TEXT,
|
||||
state TEXT,
|
||||
postcode TEXT,
|
||||
country TEXT DEFAULT 'Australia',
|
||||
lat DOUBLE PRECISION,
|
||||
lng DOUBLE PRECISION,
|
||||
rating REAL, -- Google rating 0-5
|
||||
rating_num INTEGER, -- number of Google reviews
|
||||
google_place_key TEXT, -- Google Places ID
|
||||
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_location_brand ON location(brand_id);
|
||||
CREATE INDEX idx_location_state ON location(state);
|
||||
CREATE INDEX idx_location_postcode ON location(postcode);
|
||||
CREATE INDEX idx_location_coords ON location(lat, lng);
|
||||
CREATE INDEX idx_location_google ON location(google_place_key) WHERE google_place_key IS NOT NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- 4. FUNERAL AREA (service region)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE funeral_area (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
code TEXT,
|
||||
description TEXT,
|
||||
postcodes TEXT, -- comma-separated postcode list
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden BOOLEAN DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Junction: brand <-> funeral_area
|
||||
CREATE TABLE brand_funeral_area (
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (brand_id, funeral_area_id)
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- 5. PACKAGE (funeral plan bundle)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE package (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
sort INTEGER DEFAULT 0,
|
||||
hidden BOOLEAN DEFAULT FALSE,
|
||||
for_whom TEXT, -- 'myself' / 'someone' / null (both)
|
||||
religion TEXT, -- comma-separated supported religions
|
||||
funeral_type funeral_type_enum,
|
||||
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE,
|
||||
|
||||
-- Provenance (for AI-extracted packages)
|
||||
source_url TEXT, -- page this was extracted from
|
||||
extraction_confidence REAL, -- 0-1 confidence score from AI
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_package_brand ON package(brand_id);
|
||||
CREATE INDEX idx_package_type ON package(funeral_type);
|
||||
|
||||
-- Junction: package <-> funeral_area
|
||||
CREATE TABLE package_funeral_area (
|
||||
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
||||
funeral_area_id INTEGER NOT NULL REFERENCES funeral_area(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (package_id, funeral_area_id)
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- 6. PACKAGE INCLUSION (fee line item within a package)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE package_inclusion (
|
||||
id SERIAL PRIMARY KEY,
|
||||
price NUMERIC(10,2) NOT NULL,
|
||||
optional BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
complimentary BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
display BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
description TEXT,
|
||||
sort INTEGER DEFAULT 0,
|
||||
inclusion_type_title TEXT NOT NULL, -- category label (e.g. "Professional Service Fee")
|
||||
|
||||
package_id INTEGER NOT NULL REFERENCES package(id) ON DELETE CASCADE,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_inclusion_package ON package_inclusion(package_id);
|
||||
|
||||
-- ============================================================
|
||||
-- 7. KNOWN FOR (feature badges on provider cards)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE known_for (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
brand_id INTEGER NOT NULL REFERENCES funeral_brand(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX idx_known_for_brand ON known_for(brand_id);
|
||||
|
||||
-- ============================================================
|
||||
-- 8. SOURCE LOG (audit trail of scrape runs)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE source_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_name TEXT NOT NULL, -- 'vic_register', 'gathered_here', 'nfda', 'funerals_australia'
|
||||
run_started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
run_finished_at TIMESTAMPTZ,
|
||||
records_found INTEGER DEFAULT 0,
|
||||
records_new INTEGER DEFAULT 0,
|
||||
records_updated INTEGER DEFAULT 0,
|
||||
records_skipped INTEGER DEFAULT 0,
|
||||
status TEXT DEFAULT 'running', -- 'running', 'completed', 'failed'
|
||||
error_message TEXT,
|
||||
metadata JSONB -- any extra run info
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- 9. SOURCE RECORD (raw scraped data, kept for audit)
|
||||
-- ============================================================
|
||||
|
||||
CREATE TABLE source_record (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_name TEXT NOT NULL,
|
||||
source_id TEXT NOT NULL, -- external ID from the source
|
||||
source_url TEXT,
|
||||
raw_data JSONB NOT NULL, -- original scraped data
|
||||
normalized_data JSONB, -- mapped to intermediate format
|
||||
matched_brand_id INTEGER REFERENCES funeral_brand(id) ON DELETE SET NULL,
|
||||
match_type TEXT, -- 'source_key', 'abn', 'name_postcode', 'fuzzy', 'new'
|
||||
processed_at TIMESTAMPTZ,
|
||||
log_id INTEGER REFERENCES source_log(id) ON DELETE SET NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
UNIQUE(source_name, source_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_source_record_source ON source_record(source_name, source_id);
|
||||
CREATE INDEX idx_source_record_brand ON source_record(matched_brand_id) WHERE matched_brand_id IS NOT NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- UPDATED_AT TRIGGER
|
||||
-- ============================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER trg_funeral_home_updated BEFORE UPDATE ON funeral_home FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
CREATE TRIGGER trg_funeral_brand_updated BEFORE UPDATE ON funeral_brand FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
CREATE TRIGGER trg_location_updated BEFORE UPDATE ON location FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
CREATE TRIGGER trg_funeral_area_updated BEFORE UPDATE ON funeral_area FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
CREATE TRIGGER trg_package_updated BEFORE UPDATE ON package FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
CREATE TRIGGER trg_package_inclusion_updated BEFORE UPDATE ON package_inclusion FOR EACH ROW EXECUTE FUNCTION update_updated_at();
|
||||
|
||||
COMMIT;
|
||||
Reference in New Issue
Block a user