bluesky-collector/scripts/init.sql

87 lines
3.1 KiB
MySQL
Raw Normal View History

-- Bluesky Collector Schema
-- Tracks accounts, their posts/replies, and mentions from other users.
-- Tracked accounts
CREATE TABLE accounts (
did TEXT PRIMARY KEY,
handle TEXT NOT NULL,
display_name TEXT,
added_at TIMESTAMPTZ NOT NULL DEFAULT now(),
last_feed_collected TIMESTAMPTZ,
last_mention_collected TIMESTAMPTZ,
active BOOLEAN NOT NULL DEFAULT true
);
CREATE UNIQUE INDEX idx_accounts_handle ON accounts (handle);
-- Collected posts (from tracked accounts' feeds)
CREATE TABLE posts (
uri TEXT PRIMARY KEY,
cid TEXT NOT NULL,
author_did TEXT NOT NULL,
text TEXT,
created_at TIMESTAMPTZ,
indexed_at TIMESTAMPTZ,
collected_at TIMESTAMPTZ NOT NULL DEFAULT now(),
reply_parent TEXT,
reply_root TEXT,
post_type TEXT NOT NULL DEFAULT 'post', -- post | reply | repost
has_media BOOLEAN DEFAULT false,
has_embed BOOLEAN DEFAULT false,
like_count INTEGER DEFAULT 0,
reply_count INTEGER DEFAULT 0,
repost_count INTEGER DEFAULT 0,
quote_count INTEGER DEFAULT 0,
langs TEXT[],
raw_json JSONB NOT NULL
);
CREATE INDEX idx_posts_author ON posts (author_did);
CREATE INDEX idx_posts_created ON posts (created_at DESC);
CREATE INDEX idx_posts_type ON posts (post_type);
CREATE INDEX idx_posts_collected ON posts (collected_at DESC);
CREATE INDEX idx_posts_reply_root ON posts (reply_root) WHERE reply_root IS NOT NULL;
-- Mentions: posts from *anyone* that mention a tracked account
CREATE TABLE mentions (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
post_uri TEXT NOT NULL,
mentioned_did TEXT NOT NULL,
mentioning_did TEXT,
post_text TEXT,
post_created_at TIMESTAMPTZ,
collected_at TIMESTAMPTZ NOT NULL DEFAULT now(),
raw_json JSONB NOT NULL,
UNIQUE (post_uri, mentioned_did)
);
CREATE INDEX idx_mentions_mentioned ON mentions (mentioned_did);
CREATE INDEX idx_mentions_created ON mentions (post_created_at DESC);
-- Collection run audit trail
CREATE TABLE collection_runs (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
finished_at TIMESTAMPTZ,
status TEXT NOT NULL DEFAULT 'running', -- running | completed | failed | partial
accounts_total INTEGER NOT NULL DEFAULT 0,
accounts_done INTEGER NOT NULL DEFAULT 0,
posts_collected INTEGER NOT NULL DEFAULT 0,
mentions_collected INTEGER NOT NULL DEFAULT 0,
errors JSONB DEFAULT '[]'::jsonb,
duration_secs NUMERIC
);
-- Per-account collection bookmark (survives restarts)
CREATE TABLE collection_state (
account_did TEXT NOT NULL,
collection_type TEXT NOT NULL, -- feed | mentions
last_post_at TIMESTAMPTZ,
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (account_did, collection_type)
);