87 lines
3.1 KiB
MySQL
87 lines
3.1 KiB
MySQL
|
|
-- Bluesky Collector Schema
|
||
|
|
-- Tracks accounts, their posts/replies, and mentions from other users.
|
||
|
|
|
||
|
|
-- Tracked accounts
|
||
|
|
CREATE TABLE accounts (
|
||
|
|
did TEXT PRIMARY KEY,
|
||
|
|
handle TEXT NOT NULL,
|
||
|
|
display_name TEXT,
|
||
|
|
added_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
|
|
last_feed_collected TIMESTAMPTZ,
|
||
|
|
last_mention_collected TIMESTAMPTZ,
|
||
|
|
active BOOLEAN NOT NULL DEFAULT true
|
||
|
|
);
|
||
|
|
|
||
|
|
CREATE UNIQUE INDEX idx_accounts_handle ON accounts (handle);
|
||
|
|
|
||
|
|
|
||
|
|
-- Collected posts (from tracked accounts' feeds)
|
||
|
|
CREATE TABLE posts (
|
||
|
|
uri TEXT PRIMARY KEY,
|
||
|
|
cid TEXT NOT NULL,
|
||
|
|
author_did TEXT NOT NULL,
|
||
|
|
text TEXT,
|
||
|
|
created_at TIMESTAMPTZ,
|
||
|
|
indexed_at TIMESTAMPTZ,
|
||
|
|
collected_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
|
|
reply_parent TEXT,
|
||
|
|
reply_root TEXT,
|
||
|
|
post_type TEXT NOT NULL DEFAULT 'post', -- post | reply | repost
|
||
|
|
has_media BOOLEAN DEFAULT false,
|
||
|
|
has_embed BOOLEAN DEFAULT false,
|
||
|
|
like_count INTEGER DEFAULT 0,
|
||
|
|
reply_count INTEGER DEFAULT 0,
|
||
|
|
repost_count INTEGER DEFAULT 0,
|
||
|
|
quote_count INTEGER DEFAULT 0,
|
||
|
|
langs TEXT[],
|
||
|
|
raw_json JSONB NOT NULL
|
||
|
|
);
|
||
|
|
|
||
|
|
CREATE INDEX idx_posts_author ON posts (author_did);
|
||
|
|
CREATE INDEX idx_posts_created ON posts (created_at DESC);
|
||
|
|
CREATE INDEX idx_posts_type ON posts (post_type);
|
||
|
|
CREATE INDEX idx_posts_collected ON posts (collected_at DESC);
|
||
|
|
CREATE INDEX idx_posts_reply_root ON posts (reply_root) WHERE reply_root IS NOT NULL;
|
||
|
|
|
||
|
|
|
||
|
|
-- Mentions: posts from *anyone* that mention a tracked account
|
||
|
|
CREATE TABLE mentions (
|
||
|
|
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
||
|
|
post_uri TEXT NOT NULL,
|
||
|
|
mentioned_did TEXT NOT NULL,
|
||
|
|
mentioning_did TEXT,
|
||
|
|
post_text TEXT,
|
||
|
|
post_created_at TIMESTAMPTZ,
|
||
|
|
collected_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
|
|
raw_json JSONB NOT NULL,
|
||
|
|
UNIQUE (post_uri, mentioned_did)
|
||
|
|
);
|
||
|
|
|
||
|
|
CREATE INDEX idx_mentions_mentioned ON mentions (mentioned_did);
|
||
|
|
CREATE INDEX idx_mentions_created ON mentions (post_created_at DESC);
|
||
|
|
|
||
|
|
|
||
|
|
-- Collection run audit trail
|
||
|
|
CREATE TABLE collection_runs (
|
||
|
|
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
||
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
|
|
finished_at TIMESTAMPTZ,
|
||
|
|
status TEXT NOT NULL DEFAULT 'running', -- running | completed | failed | partial
|
||
|
|
accounts_total INTEGER NOT NULL DEFAULT 0,
|
||
|
|
accounts_done INTEGER NOT NULL DEFAULT 0,
|
||
|
|
posts_collected INTEGER NOT NULL DEFAULT 0,
|
||
|
|
mentions_collected INTEGER NOT NULL DEFAULT 0,
|
||
|
|
errors JSONB DEFAULT '[]'::jsonb,
|
||
|
|
duration_secs NUMERIC
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
-- Per-account collection bookmark (survives restarts)
|
||
|
|
CREATE TABLE collection_state (
|
||
|
|
account_did TEXT NOT NULL,
|
||
|
|
collection_type TEXT NOT NULL, -- feed | mentions
|
||
|
|
last_post_at TIMESTAMPTZ,
|
||
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||
|
|
PRIMARY KEY (account_did, collection_type)
|
||
|
|
);
|