Files
Provider-Crawl/crawlers/compute_tiers.py
Richie cc91427789 Initial commit: funeral provider discovery pipeline
Python crawlers for VIC Register, Funerals Australia, NFDA
n8n workflows for scheduled discovery and enrichment
SQLite schema and seeded dev database (1,463 providers)
End-to-end process documentation in n8n/PROCESS.md
2026-04-24 10:27:08 +10:00

103 lines
3.1 KiB
Python

"""Compute listing_tier for all providers based on their data quality.
Tier logic:
verified — brand.verified = true (signed up to platform)
priced — has 2+ packages with at least one inclusion that has a price > 0
estimated — has at least one package with a total price > 0
listed — everything else (contact info only)
Run this after enrichment to update tiers across the board.
"""
from base import get_db
def compute_tier(db, brand_id: int, verified: bool) -> str:
"""Compute the listing tier for a single brand."""
if verified:
return "verified"
# Check packages
packages = db.execute(
"SELECT id, title, funeral_type FROM package WHERE brand_id = ?",
(brand_id,)
).fetchall()
if not packages:
return "listed"
# Count packages that have a meaningful total price
# A package's price = sum of non-optional, non-complimentary inclusions
packages_with_price = 0
packages_with_itemized = 0
for pkg in packages:
inclusions = db.execute(
"""SELECT price, optional, complimentary
FROM package_inclusion
WHERE package_id = ?""",
(pkg["id"],)
).fetchall()
if inclusions:
# Has itemized inclusions with prices
priced_inclusions = [
i for i in inclusions
if i["price"] and float(i["price"]) > 0
]
if len(priced_inclusions) >= 2:
packages_with_itemized += 1
packages_with_price += 1
elif len(priced_inclusions) >= 1:
packages_with_price += 1
else:
# Package exists but no inclusions — check if we stored a total
# price in the package description or via source data
# For now, a package with a funeral_type means we at least know
# what kind of service it is, even without breakdown
packages_with_price += 1
# Tier 2 (priced): 2+ packages with itemized breakdowns
if packages_with_itemized >= 2:
return "priced"
# Tier 3 (estimated): at least one package with some price
if packages_with_price >= 1:
return "estimated"
return "listed"
def run():
"""Recompute listing_tier for all brands."""
db = get_db()
brands = db.execute(
"SELECT id, verified FROM funeral_brand"
).fetchall()
counts = {"verified": 0, "priced": 0, "estimated": 0, "listed": 0}
for brand in brands:
tier = compute_tier(db, brand["id"], brand["verified"])
db.execute(
"UPDATE funeral_brand SET listing_tier = ? WHERE id = ?",
(tier, brand["id"])
)
counts[tier] += 1
db.commit()
print("Listing Tier Distribution:")
print(f" verified: {counts['verified']:>6d} (signed-up partners)")
print(f" priced: {counts['priced']:>6d} (full package breakdowns)")
print(f" estimated: {counts['estimated']:>6d} (some pricing info)")
print(f" listed: {counts['listed']:>6d} (contact info only)")
print(f" TOTAL: {sum(counts.values()):>6d}")
db.close()
if __name__ == "__main__":
run()