Loading documentation...
v2-002-filter-codes-migration-narrative.md (native filter codes, building_age axis,
-- and updated natural-key/RPC contract).
-- Keep operational status and planning context out of this file.
-- File: supabase/migrations/<timestamp>_v2_initial_schema.sql-- Design principles encoded here:
-- 1. Fact tables split by grain (daily, yearly).
-- 2. Filter axes (fire_type, building_type, region) are nullable columns
-- on fact rows, NOT analytical dimensions. NULL means "this source
-- doesn't carry that information" → graceful query-time degradation.
-- NOTE: v2-002 expands this set with building_age and switches these
-- filter columns to native-code semantics via fire_data.filters.
-- 3. Categories are SOURCE-SCOPED. BRASK's "Ildsted" and POLICE's "Ildsted"
-- are separate rows that share standard_code = 'FIREPLACE' for
-- cross-source equivalence. No shared cat_code.
-- 4. Codes use a numeric FK spine (INTEGER source_id / dim_code PKs) with
-- human labels on dimensions/sources rows; filter axes reference
-- filters.id (see docs/fire-data-schema/postgres-contract-rls-and-keys.md for live contract).
-- Earlier drafts used TEXT PKs — historical migration narratives may
-- still show TEXT; trust generated types + postgres-contract-rls-and-keys.md when they differ.
-- 5. Single source of truth. Every fact column is populated at ingest by
-- exactly one adapter. No client-side transformation. No middle-tier
-- business logic.
-- ============================================================================
BEGIN;
-- 0. SCHEMAS
CREATE SCHEMA IF NOT EXISTS fire_data; CREATE SCHEMA IF NOT EXISTS ingest;
COMMENT ON SCHEMA fire_data IS 'Analytical surface for HeatWaves. Tables and RPCs here are exposed via PostgREST. RLS-enabled.';
COMMENT ON SCHEMA ingest IS 'Operational state for ingestion jobs. NOT exposed via PostgREST. Service-role only.';
-- ============================================================================ -- LOOKUP TABLES (in fire_data) -- ============================================================================
-- 1. SOURCES — one row per data origin. SSB appears twice if treated as one -- source serving both fire-incident yearly data and building-stock yearly -- data; for now, one row.
CREATE TABLE fire_data.sources ( source_id TEXT PRIMARY KEY, long_name TEXT NOT NULL -- 'Finans Norge / BRASK' name TEXT NOT NULL, -- 'BRASK' description TEXT, grain TEXT NOT NULL, -- 'daily' | 'yearly' color TEXT, -- oklch api_url TEXT, -- documentation is_active BOOLEAN NOT NULL DEFAULT TRUE, created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT sources_grain_check CHECK (grain IN ('daily', 'yearly')) );
COMMENT ON TABLE fire_data.sources IS 'One row per upstream data source. grain determines which fact table the source writes to.';
INSERT INTO fire_data.sources (source_id, display_name, name, grain, color, api_url, description) VALUES ('BRASK', 'Finans Norge / BRASK', 'BRASK', 'daily', 'oklch(60% 0.18 30)', 'https://brask.finansnorge.no', 'Insurance industry fire claims. Daily granularity, 1985–present. Carries Kilde, Årsak, Bygningsalder, Erstatningsutgifter.'), ('POLICE', 'Politiet (BRIS)', 'POLICE', 'daily', 'oklch(55% 0.20 250)', 'https://bris.dsb.no', 'Police-reported fires from BRIS. Daily granularity, 2016–present. Carries Kilde, Politi Arnested.'), ('BRIGADE', 'Brannvesenet (BRIS)', 'BRIGADE','daily', 'oklch(58% 0.20 25)', 'https://bris.dsb.no', 'Fire brigade incident reports from BRIS "Alle branner og ulykker". Carries Oppdragstype, building_type, region.'), ('DSB', 'DSB', 'DSB', 'daily', 'oklch(55% 0.18 320)', 'https://www.dsb.no', 'Direktoratet for samfunnssikkerhet og beredskap. Daily Oppdragstype data.'), ('SSB', 'Statistisk sentralbyrå (SSB)', 'SSB', 'yearly', 'oklch(60% 0.15 200)', 'https://www.ssb.no', 'Yearly fire statistics and building stock totals. National statistical authority.');
-- 2. DIMENSIONS — analytical axes. is_active flags planned-but-not-ingested.
CREATE TABLE fire_data.dimensions ( dim_code TEXT PRIMARY KEY, name TEXT NOT NULL, -- 'Kilde' display_name TEXT, -- 'Heat Source' eu_firestat_id TEXT, -- 'HEAT_SOURCE' description TEXT, is_active BOOLEAN NOT NULL DEFAULT TRUE, sort_order SMALLINT NOT NULL DEFAULT 100, created_at TIMESTAMPTZ NOT NULL DEFAULT now() );
COMMENT ON TABLE fire_data.dimensions IS 'Analytical dimensions users can group fact data by. Filter axes (fire_type, building_type, region) are NOT dimensions; they are columns on fact tables.';
INSERT INTO fire_data.dimensions (dim_code, name, display_name, eu_firestat_id, description, is_active, sort_order) VALUES ('KILDE', 'Kilde', 'Heat Source', 'HEAT_SOURCE', 'What ignited the fire (peis, elektrisk, etc.)', TRUE, 10), ('cause', 'Årsak', 'Primary Causal Factor','PRIMARY_CAUSAL_FACTOR','The underlying cause (ildspåsettelse, feil bruk, etc.)', TRUE, 20), ('OBJEKT', 'Objekt', 'Item First Ignited', 'ITEM_FIRST_IGNITED', 'The object the fire started in. EU Firestat dimension.', FALSE, 30), ('POLITI_ARNESTED', 'Politi Arnested', 'Area of Origin', 'AREA_OF_ORIGIN', 'Where in the building the fire originated (police-specific).', TRUE, 40), ('OPPDRAGSTYPE', 'Oppdragstype', 'Incident Type', NULL, 'BRIS/DSB incident classification (brann i bygning, branntilløp, etc.)', TRUE, 50), ('BYGNINGSALDER', 'Bygningsalder', 'Building Age', NULL, 'BRASK-specific building age buckets. Planned ingestion.', FALSE, 60), ('BYGNINGSTYPE', 'Bygningstype', 'Building Type', 'BUILDING_TYPE', 'Norwegian Matrikkel building taxonomy. Planned ingestion.', FALSE, 70);
-- 3. CATEGORIES — source-scoped. Each (source, dim, cat) is unique.
-- Cross-source equivalence comes through standard_code.
CREATE TABLE fire_data.categories ( source_id TEXT NOT NULL REFERENCES fire_data.sources(source_id) ON DELETE RESTRICT, dim_code TEXT NOT NULL REFERENCES fire_data.dimensions(dim_code) ON DELETE RESTRICT, cat_code INT NOT NULL,
name TEXT NOT NULL, -- 'Ildsted (ovn, peis, gasspeis)' (BRASK native) display_name TEXT, -- optional cleaned version
standard_code TEXT, -- 'FIREPLACE' — heatwaves cross-source code
color TEXT, -- oklch, may inherit a default from standard_code icon TEXT, -- lucide icon name description TEXT, sort_order SMALLINT NOT NULL DEFAULT 100,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (source_id, dim_code, cat_code) );
COMMENT ON TABLE fire_data.categories IS 'Source-scoped category definitions. BRASK Ildsted and POLICE Ildsted are separate rows sharing standard_code = FIREPLACE.';
COMMENT ON COLUMN fire_data.categories.standard_code IS 'HeatWaves cross-source equivalence code. Categories sharing a standard_code are conceptually the same thing across sources.';
-- Index for cross-source lookups: "give me every category that's a FIREPLACE" CREATE INDEX idx_categories_standard_code ON fire_data.categories (standard_code) WHERE standard_code IS NOT NULL;
-- Index for catalog queries: "give me all categories for BRASK's Kilde dimension" CREATE INDEX idx_categories_source_dim ON fire_data.categories (source_id, dim_code, sort_order);
-- ============================================================================ -- FACT TABLES (in fire_data) -- ============================================================================
-- 4. fact_daily — DATE-grain observations (INTEGER FK spine added in later migrations). -- Incident facts at DATE grain where adapters emit daily histogram cells. -- Yearly-only upstream catalogs land in fact_yearly instead — see taxonomy + -- docs/fire-data-schema/conceptual-tables-and-grain.md ("supports_subyear"). -- Surrogate id + UNIQUE INDEX over the natural key (with COALESCE for -- NULL-bearing columns). PRIMARY KEY can't take expressions, but unique -- indexes can — ON CONFLICT references the same expression list.
CREATE TABLE fire_data.fact_daily ( id BIGSERIAL PRIMARY KEY,
date DATE NOT NULL, source_id TEXT NOT NULL, dim_code TEXT NOT NULL, cat_code INT NOT NULL,
value NUMERIC NOT NULL DEFAULT 0, -- count of fires value_kr NUMERIC, -- erstatningsutgifter NOK; BRASK only
-- Filter axes — NULL = source doesn't carry this info → excluded from -- non-NULL filter queries (graceful degradation). fire_type_code TEXT, building_type_code TEXT, region_code TEXT,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT fact_daily_value_nonneg CHECK (value >= 0), CONSTRAINT fact_daily_value_kr_nonneg CHECK (value_kr IS NULL OR value_kr >= 0), CONSTRAINT fact_daily_fire_type_check CHECK (fire_type_code IS NULL OR fire_type_code IN ('CONFINED', 'DEVELOPED')), CONSTRAINT fact_daily_building_type_check CHECK (building_type_code IS NULL OR building_type_code IN ('RESIDENTIAL', 'COMMERCIAL', 'OTHER')),
FOREIGN KEY (source_id, dim_code, cat_code) REFERENCES fire_data.categories (source_id, dim_code, cat_code) ON DELETE RESTRICT );
COMMENT ON TABLE fire_data.fact_daily IS 'Daily-grain fire observations. Each row is one (source, date, dim, cat, [filter axes]) slice. Upsert on the natural-key unique index.';
COMMENT ON COLUMN fire_data.fact_daily.value_kr IS 'BRASK insurance claim amount in NOK. NULL for sources that do not report it. Sum across rows for total claim amounts; the count column (value) handles fire counts independently.';
COMMENT ON COLUMN fire_data.fact_daily.building_type_code IS 'Building type slice. NULL = source has no building-type capability. Important: ingest only granular slices (RESIDENTIAL, COMMERCIAL, OTHER) — never an "ALL" rollup, which would double-count.';
-- Natural key unique index — used by INSERT ... ON CONFLICT for upserts. -- COALESCE wraps NULLs as '' for uniqueness comparison. CREATE UNIQUE INDEX fact_daily_natural_key ON fire_data.fact_daily ( source_id, dim_code, cat_code, date, (COALESCE(fire_type_code, '')), (COALESCE(building_type_code, '')), (COALESCE(region_code, '')) );
-- Query-pattern index for the most common chart filter combo. -- A multi-source chart restricted to a date range and one or more dimensions. CREATE INDEX idx_fact_daily_date_source_dim ON fire_data.fact_daily (date, source_id, dim_code);
-- 5. fact_yearly — YEAR-grain observations ingested at annual cadence. -- Includes SSB and BRIS endpoints restricted to YEAR aggregates — not "SSB only". -- Year buckets must come from ingest, never from summarizing fact_daily in SQL. -- Same logical shape as fact_daily but time axis is year (SMALLINT) instead of date.
CREATE TABLE fire_data.fact_yearly ( id BIGSERIAL PRIMARY KEY,
year SMALLINT NOT NULL, source_id TEXT NOT NULL, dim_code TEXT NOT NULL, cat_code INT NOT NULL,
value NUMERIC NOT NULL DEFAULT 0, value_kr NUMERIC,
fire_type_code TEXT, building_type_code TEXT, region_code TEXT,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT fact_yearly_year_check CHECK (year BETWEEN 1900 AND 2100), CONSTRAINT fact_yearly_value_nonneg CHECK (value >= 0), CONSTRAINT fact_yearly_value_kr_nonneg CHECK (value_kr IS NULL OR value_kr >= 0), CONSTRAINT fact_yearly_fire_type_check CHECK (fire_type_code IS NULL OR fire_type_code IN ('CONFINED', 'DEVELOPED')), CONSTRAINT fact_yearly_building_type_check CHECK (building_type_code IS NULL OR building_type_code IN ('RESIDENTIAL', 'COMMERCIAL', 'OTHER')),
FOREIGN KEY (source_id, dim_code, cat_code) REFERENCES fire_data.categories (source_id, dim_code, cat_code) ON DELETE RESTRICT );
CREATE UNIQUE INDEX fact_yearly_natural_key ON fire_data.fact_yearly ( source_id, dim_code, cat_code, year, (COALESCE(fire_type_code, '')), (COALESCE(building_type_code, '')), (COALESCE(region_code, '')) );
CREATE INDEX idx_fact_yearly_year_source_dim ON fire_data.fact_yearly (year, source_id, dim_code);
-- "5,000,000 RESIDENTIAL exist in 2020" lives here, not in fact_yearly. -- Used for normalisation — "fires per 1000 RESIDENTIAL" is a JOIN. -- Currently SSB only.
CREATE TABLE fire_data.fact_buildings_yearly ( id BIGSERIAL PRIMARY KEY,
year SMALLINT NOT NULL, source_id TEXT NOT NULL DEFAULT 'SSB', building_type_code TEXT NOT NULL, region_code TEXT,
value NUMERIC NOT NULL, -- count of buildings
ingested_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT fact_buildings_year_check CHECK (year BETWEEN 1900 AND 2100), CONSTRAINT fact_buildings_value_nonneg CHECK (value >= 0), CONSTRAINT fact_buildings_btype_check CHECK (building_type_code IN ('RESIDENTIAL', 'COMMERCIAL', 'OTHER')) );
COMMENT ON TABLE fire_data.fact_buildings_yearly IS 'Building-stock totals (non-fire). Enables fires-per-capita-style normalisation. Query as JOIN against fact_daily/fact_yearly when the user wants normalised metrics.';
CREATE UNIQUE INDEX fact_buildings_natural_key ON fire_data.fact_buildings_yearly ( source_id, year, building_type_code, (COALESCE(region_code, '')) );
-- ============================================================================ -- INGEST SCHEMA — operational state, not exposed via PostgREST -- ============================================================================
-- Note: this table tracks SLICES (a (source, dim, [filters], period) tuple) -- not individual rows. The fact-table upsert handles row-level dedup.
CREATE TABLE ingest.extraction_state ( id BIGSERIAL PRIMARY KEY,
source_id TEXT NOT NULL, dim_code TEXT NOT NULL, fire_type_code TEXT, building_type_code TEXT, region_code TEXT, period_start DATE NOT NULL, period_end DATE NOT NULL,
last_fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(), last_row_count INT, last_total_value NUMERIC, -- diagnostic: sum across all rows status TEXT NOT NULL DEFAULT 'success', error_message TEXT,
CONSTRAINT extraction_state_status_check CHECK (status IN ('success', 'partial', 'failed', 'pending')) );
CREATE UNIQUE INDEX extraction_state_natural_key ON ingest.extraction_state ( source_id, dim_code, period_start, period_end, (COALESCE(fire_type_code, '')), (COALESCE(building_type_code, '')), (COALESCE(region_code, '')) );
COMMENT ON TABLE ingest.extraction_state IS 'Tracks when each ingestion slice was last fetched. Driven by Utforsk-triggered refreshes and scheduled jobs. Does not store row deltas — the fact-table upsert handles that natively.';
-- 8. raw_payloads — optional, append-only audit log of API responses. -- Useful for debugging adapter logic or replaying failed ingests. -- Disabled by default; enable per-source if needed.
CREATE TABLE ingest.raw_payloads ( id BIGSERIAL PRIMARY KEY, source_id TEXT NOT NULL, request_url TEXT, request_params JSONB, response_body JSONB, http_status INT, fetched_at TIMESTAMPTZ NOT NULL DEFAULT now(), notes TEXT );
CREATE INDEX idx_raw_payloads_source_fetched ON ingest.raw_payloads (source_id, fetched_at DESC);
COMMENT ON TABLE ingest.raw_payloads IS 'Append-only audit log of upstream API responses. Optional — populate only when debugging or for sources with strict provenance requirements.';
-- ============================================================================ -- ROW-LEVEL SECURITY -- ============================================================================
-- fire_data tables: readable by anon and authenticated; writes only via -- service-role (used by ingestion) or via SECURITY DEFINER RPCs.
ALTER TABLE fire_data.sources ENABLE ROW LEVEL SECURITY; ALTER TABLE fire_data.dimensions ENABLE ROW LEVEL SECURITY; ALTER TABLE fire_data.categories ENABLE ROW LEVEL SECURITY; ALTER TABLE fire_data.fact_daily ENABLE ROW LEVEL SECURITY; ALTER TABLE fire_data.fact_yearly ENABLE ROW LEVEL SECURITY; ALTER TABLE fire_data.fact_buildings_yearly ENABLE ROW LEVEL SECURITY;
-- Read-only access for anon and authenticated. Writes go through service_role -- (which bypasses RLS) or future SECURITY DEFINER functions.
CREATE POLICY "fire_data read-anon" ON fire_data.sources FOR SELECT TO anon, authenticated USING (true); CREATE POLICY "fire_data read-anon" ON fire_data.dimensions FOR SELECT TO anon, authenticated USING (true); CREATE POLICY "fire_data read-anon" ON fire_data.categories FOR SELECT TO anon, authenticated USING (true); CREATE POLICY "fire_data read-anon" ON fire_data.fact_daily FOR SELECT TO anon, authenticated USING (true); CREATE POLICY "fire_data read-anon" ON fire_data.fact_yearly FOR SELECT TO anon, authenticated USING (true); CREATE POLICY "fire_data read-anon" ON fire_data.fact_buildings_yearly FOR SELECT TO anon, authenticated USING (true);
-- ingest schema: NO policies. Default-deny via RLS. Only service_role bypasses. ALTER TABLE ingest.extraction_state ENABLE ROW LEVEL SECURITY; ALTER TABLE ingest.raw_payloads ENABLE ROW LEVEL SECURITY;
-- ============================================================================ -- POSTGREST EXPOSURE -- ============================================================================
-- After running this migration, add 'fire_data' to the API → Exposed schemas -- list in the Supabase dashboard. Do NOT add 'ingest'.
COMMIT;
-- ============================================================================ -- VERIFICATION QUERIES — run after the migration completes -- ============================================================================
-- V1. All schemas exist -- SELECT schema_name FROM information_schema.schemata -- WHERE schema_name IN ('fire_data', 'ingest');
-- V2. All tables exist with expected columns -- SELECT table_schema, table_name, count(*) AS columns -- FROM information_schema.columns -- WHERE table_schema IN ('fire_data', 'ingest') -- GROUP BY 1, 2 ORDER BY 1, 2;
-- V3. RLS is enabled -- SELECT schemaname, tablename, rowsecurity -- FROM pg_tables -- WHERE schemaname IN ('fire_data', 'ingest') -- ORDER BY 1, 2;
-- V4. Seed data — sources and dimensions -- SELECT * FROM fire_data.sources ORDER BY source_id; -- SELECT * FROM fire_data.dimensions ORDER BY sort_order;