Initial commit: funeral provider discovery pipeline
Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
This commit is contained in:
102
crawlers/compute_tiers.py
Normal file
102
crawlers/compute_tiers.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Compute listing_tier for all providers based on their data quality.
|
||||
|
||||
Tier logic:
|
||||
verified — brand.verified = true (signed up to platform)
|
||||
priced — has 2+ packages with at least one inclusion that has a price > 0
|
||||
estimated — has at least one package with a total price > 0
|
||||
listed — everything else (contact info only)
|
||||
|
||||
Run this after enrichment to update tiers across the board.
|
||||
"""
|
||||
|
||||
from base import get_db
|
||||
|
||||
|
||||
def compute_tier(db, brand_id: int, verified: bool) -> str:
|
||||
"""Compute the listing tier for a single brand."""
|
||||
if verified:
|
||||
return "verified"
|
||||
|
||||
# Check packages
|
||||
packages = db.execute(
|
||||
"SELECT id, title, funeral_type FROM package WHERE brand_id = ?",
|
||||
(brand_id,)
|
||||
).fetchall()
|
||||
|
||||
if not packages:
|
||||
return "listed"
|
||||
|
||||
# Count packages that have a meaningful total price
|
||||
# A package's price = sum of non-optional, non-complimentary inclusions
|
||||
packages_with_price = 0
|
||||
packages_with_itemized = 0
|
||||
|
||||
for pkg in packages:
|
||||
inclusions = db.execute(
|
||||
"""SELECT price, optional, complimentary
|
||||
FROM package_inclusion
|
||||
WHERE package_id = ?""",
|
||||
(pkg["id"],)
|
||||
).fetchall()
|
||||
|
||||
if inclusions:
|
||||
# Has itemized inclusions with prices
|
||||
priced_inclusions = [
|
||||
i for i in inclusions
|
||||
if i["price"] and float(i["price"]) > 0
|
||||
]
|
||||
if len(priced_inclusions) >= 2:
|
||||
packages_with_itemized += 1
|
||||
packages_with_price += 1
|
||||
elif len(priced_inclusions) >= 1:
|
||||
packages_with_price += 1
|
||||
else:
|
||||
# Package exists but no inclusions — check if we stored a total
|
||||
# price in the package description or via source data
|
||||
# For now, a package with a funeral_type means we at least know
|
||||
# what kind of service it is, even without breakdown
|
||||
packages_with_price += 1
|
||||
|
||||
# Tier 2 (priced): 2+ packages with itemized breakdowns
|
||||
if packages_with_itemized >= 2:
|
||||
return "priced"
|
||||
|
||||
# Tier 3 (estimated): at least one package with some price
|
||||
if packages_with_price >= 1:
|
||||
return "estimated"
|
||||
|
||||
return "listed"
|
||||
|
||||
|
||||
def run():
|
||||
"""Recompute listing_tier for all brands."""
|
||||
db = get_db()
|
||||
|
||||
brands = db.execute(
|
||||
"SELECT id, verified FROM funeral_brand"
|
||||
).fetchall()
|
||||
|
||||
counts = {"verified": 0, "priced": 0, "estimated": 0, "listed": 0}
|
||||
|
||||
for brand in brands:
|
||||
tier = compute_tier(db, brand["id"], brand["verified"])
|
||||
db.execute(
|
||||
"UPDATE funeral_brand SET listing_tier = ? WHERE id = ?",
|
||||
(tier, brand["id"])
|
||||
)
|
||||
counts[tier] += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
print("Listing Tier Distribution:")
|
||||
print(f" verified: {counts['verified']:>6d} (signed-up partners)")
|
||||
print(f" priced: {counts['priced']:>6d} (full package breakdowns)")
|
||||
print(f" estimated: {counts['estimated']:>6d} (some pricing info)")
|
||||
print(f" listed: {counts['listed']:>6d} (contact info only)")
|
||||
print(f" TOTAL: {sum(counts.values()):>6d}")
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Reference in New Issue
Block a user