-- Bluesky Collector Schema -- Tracks accounts, their posts/replies, and mentions from other users. -- Tracked accounts CREATE TABLE accounts ( did TEXT PRIMARY KEY, handle TEXT NOT NULL, display_name TEXT, added_at TIMESTAMPTZ NOT NULL DEFAULT now(), last_feed_collected TIMESTAMPTZ, last_mention_collected TIMESTAMPTZ, active BOOLEAN NOT NULL DEFAULT true ); CREATE UNIQUE INDEX idx_accounts_handle ON accounts (handle); -- Collected posts (from tracked accounts' feeds) CREATE TABLE posts ( uri TEXT PRIMARY KEY, cid TEXT NOT NULL, author_did TEXT NOT NULL, text TEXT, created_at TIMESTAMPTZ, indexed_at TIMESTAMPTZ, collected_at TIMESTAMPTZ NOT NULL DEFAULT now(), reply_parent TEXT, reply_root TEXT, post_type TEXT NOT NULL DEFAULT 'post', -- post | reply | repost has_media BOOLEAN DEFAULT false, has_embed BOOLEAN DEFAULT false, like_count INTEGER DEFAULT 0, reply_count INTEGER DEFAULT 0, repost_count INTEGER DEFAULT 0, quote_count INTEGER DEFAULT 0, langs TEXT[], raw_json JSONB NOT NULL ); CREATE INDEX idx_posts_author ON posts (author_did); CREATE INDEX idx_posts_created ON posts (created_at DESC); CREATE INDEX idx_posts_type ON posts (post_type); CREATE INDEX idx_posts_collected ON posts (collected_at DESC); CREATE INDEX idx_posts_reply_root ON posts (reply_root) WHERE reply_root IS NOT NULL; -- Mentions: posts from *anyone* that mention a tracked account CREATE TABLE mentions ( id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, post_uri TEXT NOT NULL, mentioned_did TEXT NOT NULL, mentioning_did TEXT, post_text TEXT, post_created_at TIMESTAMPTZ, collected_at TIMESTAMPTZ NOT NULL DEFAULT now(), raw_json JSONB NOT NULL, UNIQUE (post_uri, mentioned_did) ); CREATE INDEX idx_mentions_mentioned ON mentions (mentioned_did); CREATE INDEX idx_mentions_created ON mentions (post_created_at DESC); -- Collection run audit trail CREATE TABLE collection_runs ( id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, started_at TIMESTAMPTZ NOT NULL DEFAULT now(), finished_at TIMESTAMPTZ, status TEXT NOT NULL DEFAULT 'running', -- running | completed | failed | partial accounts_total INTEGER NOT NULL DEFAULT 0, accounts_done INTEGER NOT NULL DEFAULT 0, posts_collected INTEGER NOT NULL DEFAULT 0, mentions_collected INTEGER NOT NULL DEFAULT 0, errors JSONB DEFAULT '[]'::jsonb, duration_secs NUMERIC ); -- Per-account collection bookmark (survives restarts) CREATE TABLE collection_state ( account_did TEXT NOT NULL, collection_type TEXT NOT NULL, -- feed | mentions last_post_at TIMESTAMPTZ, updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), PRIMARY KEY (account_did, collection_type) );