bluesky-collector/scripts/02-toxicity.sql

66 lines
2.9 KiB
MySQL
Raw Normal View History

-- Toxicity Analysis Schema
-- Stores per-post and per-mention toxicity scores from LLM classification.
-- Toxicity scores for posts (from tracked accounts' feeds)
CREATE TABLE IF NOT EXISTS toxicity_scores (
uri TEXT PRIMARY KEY REFERENCES posts(uri) ON DELETE CASCADE,
overall REAL NOT NULL,
toxic REAL NOT NULL DEFAULT 0,
threat REAL NOT NULL DEFAULT 0,
hate_speech REAL NOT NULL DEFAULT 0,
racism REAL NOT NULL DEFAULT 0,
antisemitism REAL NOT NULL DEFAULT 0,
islamophobia REAL NOT NULL DEFAULT 0,
sexism REAL NOT NULL DEFAULT 0,
homophobia REAL NOT NULL DEFAULT 0,
insult REAL NOT NULL DEFAULT 0,
dehumanization REAL NOT NULL DEFAULT 0,
extremism REAL NOT NULL DEFAULT 0,
ableism REAL NOT NULL DEFAULT 0,
flagged BOOLEAN NOT NULL DEFAULT false,
model TEXT NOT NULL DEFAULT 'gpt-4.1-nano',
scored_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_tox_flagged ON toxicity_scores (flagged) WHERE flagged = true;
CREATE INDEX IF NOT EXISTS idx_tox_overall ON toxicity_scores (overall DESC);
CREATE INDEX IF NOT EXISTS idx_tox_scored ON toxicity_scores (scored_at DESC);
-- Toxicity scores for mentions (posts about tracked accounts)
CREATE TABLE IF NOT EXISTS mention_toxicity_scores (
mention_id BIGINT PRIMARY KEY REFERENCES mentions(id) ON DELETE CASCADE,
overall REAL NOT NULL,
toxic REAL NOT NULL DEFAULT 0,
threat REAL NOT NULL DEFAULT 0,
hate_speech REAL NOT NULL DEFAULT 0,
racism REAL NOT NULL DEFAULT 0,
antisemitism REAL NOT NULL DEFAULT 0,
islamophobia REAL NOT NULL DEFAULT 0,
sexism REAL NOT NULL DEFAULT 0,
homophobia REAL NOT NULL DEFAULT 0,
insult REAL NOT NULL DEFAULT 0,
dehumanization REAL NOT NULL DEFAULT 0,
extremism REAL NOT NULL DEFAULT 0,
ableism REAL NOT NULL DEFAULT 0,
flagged BOOLEAN NOT NULL DEFAULT false,
model TEXT NOT NULL DEFAULT 'gpt-4.1-nano',
scored_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_mtox_flagged ON mention_toxicity_scores (flagged) WHERE flagged = true;
CREATE INDEX IF NOT EXISTS idx_mtox_overall ON mention_toxicity_scores (overall DESC);
-- Analysis run audit trail
CREATE TABLE IF NOT EXISTS analysis_runs (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
finished_at TIMESTAMPTZ,
status TEXT NOT NULL DEFAULT 'running', -- running | completed | failed | partial
posts_scored INTEGER NOT NULL DEFAULT 0,
mentions_scored INTEGER NOT NULL DEFAULT 0,
errors INTEGER NOT NULL DEFAULT 0,
model TEXT NOT NULL,
cost_usd NUMERIC(10,6) DEFAULT 0,
duration_secs NUMERIC
);