Python crawlers for VIC Register, Funerals Australia, NFDA n8n workflows for scheduled discovery and enrichment SQLite schema and seeded dev database (1,463 providers) End-to-end process documentation in n8n/PROCESS.md
103 lines
3.1 KiB
Python
103 lines
3.1 KiB
Python
"""Compute listing_tier for all providers based on their data quality.
|
|
|
|
Tier logic:
|
|
verified — brand.verified = true (signed up to platform)
|
|
priced — has 2+ packages with at least one inclusion that has a price > 0
|
|
estimated — has at least one package with a total price > 0
|
|
listed — everything else (contact info only)
|
|
|
|
Run this after enrichment to update tiers across the board.
|
|
"""
|
|
|
|
from base import get_db
|
|
|
|
|
|
def compute_tier(db, brand_id: int, verified: bool) -> str:
|
|
"""Compute the listing tier for a single brand."""
|
|
if verified:
|
|
return "verified"
|
|
|
|
# Check packages
|
|
packages = db.execute(
|
|
"SELECT id, title, funeral_type FROM package WHERE brand_id = ?",
|
|
(brand_id,)
|
|
).fetchall()
|
|
|
|
if not packages:
|
|
return "listed"
|
|
|
|
# Count packages that have a meaningful total price
|
|
# A package's price = sum of non-optional, non-complimentary inclusions
|
|
packages_with_price = 0
|
|
packages_with_itemized = 0
|
|
|
|
for pkg in packages:
|
|
inclusions = db.execute(
|
|
"""SELECT price, optional, complimentary
|
|
FROM package_inclusion
|
|
WHERE package_id = ?""",
|
|
(pkg["id"],)
|
|
).fetchall()
|
|
|
|
if inclusions:
|
|
# Has itemized inclusions with prices
|
|
priced_inclusions = [
|
|
i for i in inclusions
|
|
if i["price"] and float(i["price"]) > 0
|
|
]
|
|
if len(priced_inclusions) >= 2:
|
|
packages_with_itemized += 1
|
|
packages_with_price += 1
|
|
elif len(priced_inclusions) >= 1:
|
|
packages_with_price += 1
|
|
else:
|
|
# Package exists but no inclusions — check if we stored a total
|
|
# price in the package description or via source data
|
|
# For now, a package with a funeral_type means we at least know
|
|
# what kind of service it is, even without breakdown
|
|
packages_with_price += 1
|
|
|
|
# Tier 2 (priced): 2+ packages with itemized breakdowns
|
|
if packages_with_itemized >= 2:
|
|
return "priced"
|
|
|
|
# Tier 3 (estimated): at least one package with some price
|
|
if packages_with_price >= 1:
|
|
return "estimated"
|
|
|
|
return "listed"
|
|
|
|
|
|
def run():
|
|
"""Recompute listing_tier for all brands."""
|
|
db = get_db()
|
|
|
|
brands = db.execute(
|
|
"SELECT id, verified FROM funeral_brand"
|
|
).fetchall()
|
|
|
|
counts = {"verified": 0, "priced": 0, "estimated": 0, "listed": 0}
|
|
|
|
for brand in brands:
|
|
tier = compute_tier(db, brand["id"], brand["verified"])
|
|
db.execute(
|
|
"UPDATE funeral_brand SET listing_tier = ? WHERE id = ?",
|
|
(tier, brand["id"])
|
|
)
|
|
counts[tier] += 1
|
|
|
|
db.commit()
|
|
|
|
print("Listing Tier Distribution:")
|
|
print(f" verified: {counts['verified']:>6d} (signed-up partners)")
|
|
print(f" priced: {counts['priced']:>6d} (full package breakdowns)")
|
|
print(f" estimated: {counts['estimated']:>6d} (some pricing info)")
|
|
print(f" listed: {counts['listed']:>6d} (contact info only)")
|
|
print(f" TOTAL: {sum(counts.values()):>6d}")
|
|
|
|
db.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run()
|