commit 1783a48d7c1c9f42fa2cf63b347a2efa89e412d9
Author: Pieter <pieter@kolabnow.com>
Date:   Mon Feb 9 08:05:54 2026 +0100

    Initial commit: Mastodon collector application
    
    Add Flask-based application for collecting and archiving Mastodon posts from configured accounts.
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-Authored-By: Claude <noreply@anthropic.com>

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..24a9226
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+__pycache__
+*.pyc
+.env
+.git
+.gitignore
+README.md
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..1904194
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,8 @@
+# PostgreSQL Configuration
+POSTGRES_PASSWORD=your_secure_password_here
+
+# Flask Configuration
+FLASK_SECRET_KEY=your_secure_secret_key_here
+
+# Polling Configuration
+POLL_INTERVAL_SECONDS=14400
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..595ac22
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,115 @@
+# Environment variables and secrets
+.env
+.env.local
+.env.*.local
+*.secret
+secrets/
+credentials/
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+.venv/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Database files
+*.sqlite
+*.sqlite3
+*.db
+*.db-journal
+*.db-shm
+*.db-wal
+postgres_data/
+pgdata/
+
+# Logs
+*.log
+logs/
+*.log.*
+
+# Docker volumes and local data
+docker-compose.override.yml
+.docker/
+volumes/
+
+# Certificates and keys
+*.pem
+*.key
+*.crt
+*.cer
+*.p12
+*.pfx
+
+# Backup files
+*.bak
+*.backup
+*.tmp
+*~
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.hypothesis/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# Poetry
+poetry.lock
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b1b4a17
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq-dev gcc \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+# Create empty accounts file if it doesn't exist
+RUN touch /app/accounts.txt
+
+EXPOSE 5000
+
+CMD ["python", "-m", "app.collector"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8b8a56c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,91 @@
+# Mastodon Collector
+
+Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes a web UI for account management and data browsing, plus JSON/CSV APIs for your analysis pipeline.
+
+## Quick Start
+
+```bash
+# 1. Add accounts to monitor
+echo "@user@mastodon.social" >> accounts.txt
+
+# 2. Start everything
+docker compose up -d
+
+# 3. Open the dashboard
+open http://localhost:8585
+```
+
+## Architecture
+
+| Service       | Description                                    | Port  |
+|---------------|------------------------------------------------|-------|
+| **db**        | PostgreSQL 16                                  | 5432  |
+| **web**       | Flask dashboard (Gunicorn)                     | 8585  |
+| **collector** | Background service, polls every 4 hours        | —     |
+
+## Adding Accounts
+
+Two methods:
+
+1. **Text file** — edit `accounts.txt`, one handle per line (`@user@instance.social`). Picked up on next collection cycle.
+2. **Web UI** — go to http://localhost:8585/accounts and use the form.
+
+## Configuration
+
+Edit `.env` to customize:
+
+```
+POSTGRES_PASSWORD=collector_secret      # Change for production
+FLASK_SECRET_KEY=change-me-in-production
+POLL_INTERVAL_SECONDS=14400             # Default: 4 hours (14400s)
+```
+
+## API Endpoints
+
+For plugging into your analysis pipeline:
+
+| Endpoint              | Description                          |
+|-----------------------|--------------------------------------|
+| `GET /api/stats`      | Overview stats (counts by type)      |
+| `GET /api/statuses`   | Paginated statuses as JSON           |
+| `GET /export`         | Download all statuses as CSV         |
+
+### `/api/statuses` parameters
+
+- `page` — page number (default: 1)
+- `per_page` — results per page (default: 100, max: 500)
+- `account_id` — filter by internal account ID
+- `type` — filter by status type: `post`, `reply`, `mention`, `reblog`
+- `since` — ISO datetime, only return statuses after this time
+
+## Database Schema
+
+Main tables:
+
+- `monitored_accounts` — accounts being tracked
+- `statuses` — collected posts with plain text + HTML content
+- `mentions` — who was @-mentioned in each status
+- `media_attachments` — images/videos attached to statuses
+- `tags` — hashtags used
+- `collection_logs` — audit trail of each collection run
+
+Each status stores `raw_json` with the full Mastodon API response for future analysis needs.
+
+## Moving to a Server
+
+```bash
+# Copy the project
+scp -r mastodon-collector/ user@server:~/
+
+# On the server
+cd mastodon-collector
+# Edit .env with production secrets
+docker compose up -d
+```
+
+## Stopping
+
+```bash
+docker compose down          # Stop services, keep data
+docker compose down -v       # Stop services AND delete database
+```
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/__main__.py b/app/__main__.py
new file mode 100644
index 0000000..616c627
--- /dev/null
+++ b/app/__main__.py
@@ -0,0 +1,4 @@
+"""Allow running the collector with `python -m app`."""
+from app.collector import main
+
+main()
diff --git a/app/collector.py b/app/collector.py
new file mode 100644
index 0000000..0c163b2
--- /dev/null
+++ b/app/collector.py
@@ -0,0 +1,306 @@
+"""
+Collector service — periodically polls Mastodon for new statuses from monitored accounts.
+Runs as a standalone process via `python -m app.collector`.
+"""
+
+import logging
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+from apscheduler.schedulers.blocking import BlockingScheduler
+
+from app.db import (
+    init_db,
+    get_session,
+    MonitoredAccount,
+    Status,
+    Mention,
+    MediaAttachment,
+    Tag,
+    CollectionLog,
+)
+from app.mastodon_api import (
+    lookup_account,
+    get_account_statuses,
+    parse_status,
+    MastodonAPIError,
+    RateLimitError,
+)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger = logging.getLogger("collector")
+
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL_SECONDS", 14400))
+ACCOUNTS_FILE = os.environ.get("ACCOUNTS_FILE", "/app/accounts.txt")
+
+
+def load_accounts_from_file(filepath: str) -> list[tuple[str, str]]:
+    """Parse accounts.txt and return list of (username, instance) tuples."""
+    accounts = []
+    path = Path(filepath)
+    if not path.exists():
+        logger.warning("Accounts file not found: %s", filepath)
+        return accounts
+
+    for line in path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        # Expected format: @user@instance.social or user@instance.social
+        line = line.lstrip("@")
+        if "@" not in line:
+            logger.warning("Skipping malformed account line: %s", line)
+            continue
+        parts = line.split("@", 1)
+        if len(parts) == 2 and parts[0] and parts[1]:
+            accounts.append((parts[0], parts[1]))
+        else:
+            logger.warning("Skipping malformed account line: %s", line)
+    return accounts
+
+
+def sync_monitored_accounts(session) -> list[MonitoredAccount]:
+    """
+    Sync accounts from the file + database.
+    Accounts added via web UI are already in the DB.
+    Accounts in the file get added if missing.
+    Returns all active monitored accounts.
+    """
+    file_accounts = load_accounts_from_file(ACCOUNTS_FILE)
+
+    for username, instance in file_accounts:
+        existing = (
+            session.query(MonitoredAccount)
+            .filter_by(username=username, instance=instance)
+            .first()
+        )
+        if not existing:
+            logger.info("Adding account from file: @%s@%s", username, instance)
+            acct = MonitoredAccount(username=username, instance=instance, is_active=True)
+            session.add(acct)
+
+    session.commit()
+    return session.query(MonitoredAccount).filter_by(is_active=True).all()
+
+
+def resolve_account(session, account: MonitoredAccount) -> bool:
+    """Look up the Mastodon account ID if we don't have it yet."""
+    if account.account_id:
+        return True
+
+    try:
+        data = lookup_account(account.instance, account.username)
+        account.account_id = data["id"]
+        account.display_name = data.get("display_name", "")
+        account.avatar_url = data.get("avatar", "")
+        account.note = data.get("note", "")
+        session.commit()
+        logger.info("Resolved %s → account_id=%s", account.handle, account.account_id)
+        return True
+    except MastodonAPIError as e:
+        logger.error("Failed to resolve %s: %s", account.handle, e)
+        return False
+
+
+def store_status(session, account: MonitoredAccount, parsed: dict) -> bool:
+    """Store a parsed status in the database. Returns True if new, False if duplicate."""
+    # Check for duplicate
+    existing = (
+        session.query(Status)
+        .filter_by(status_id=parsed["status_id"], account_db_id=account.id)
+        .first()
+    )
+    if existing:
+        # Update interaction counts in case they changed
+        existing.replies_count = parsed["replies_count"]
+        existing.reblogs_count = parsed["reblogs_count"]
+        existing.favourites_count = parsed["favourites_count"]
+        return False
+
+    status = Status(
+        status_id=parsed["status_id"],
+        account_db_id=account.id,
+        uri=parsed["uri"],
+        url=parsed["url"],
+        content=parsed["content"],
+        text_content=parsed["text_content"],
+        visibility=parsed["visibility"],
+        created_at=parsed["created_at"],
+        language=parsed["language"],
+        sensitive=parsed["sensitive"],
+        spoiler_text=parsed["spoiler_text"],
+        in_reply_to_id=parsed["in_reply_to_id"],
+        in_reply_to_account_id=parsed["in_reply_to_account_id"],
+        conversation_id=parsed["conversation_id"],
+        replies_count=parsed["replies_count"],
+        reblogs_count=parsed["reblogs_count"],
+        favourites_count=parsed["favourites_count"],
+        status_type=parsed["status_type"],
+        raw_json=parsed["raw_json"],
+    )
+    session.add(status)
+    session.flush()  # get status.id
+
+    # Store mentions
+    for m in parsed["mentions"]:
+        session.add(Mention(
+            status_db_id=status.id,
+            mentioned_account_id=m["mentioned_account_id"],
+            mentioned_username=m["mentioned_username"],
+            mentioned_acct=m["mentioned_acct"],
+            mentioned_url=m["mentioned_url"],
+        ))
+
+    # Store media
+    for ma in parsed["media_attachments"]:
+        session.add(MediaAttachment(
+            status_db_id=status.id,
+            media_id=ma["media_id"],
+            media_type=ma["media_type"],
+            url=ma["url"],
+            preview_url=ma["preview_url"],
+            description=ma["description"],
+        ))
+
+    # Store tags
+    for t in parsed["tags"]:
+        session.add(Tag(
+            status_db_id=status.id,
+            name=t["name"],
+            url=t["url"],
+        ))
+
+    return True
+
+
+def collect_account(session, account: MonitoredAccount) -> int:
+    """Collect new statuses for a single account. Returns count of new statuses."""
+    log = CollectionLog(account_db_id=account.id, status="running")
+    session.add(log)
+    session.commit()
+
+    try:
+        if not resolve_account(session, account):
+            log.status = "error"
+            log.error = "Could not resolve account ID"
+            log.finished_at = datetime.now(timezone.utc)
+            session.commit()
+            return 0
+
+        logger.info("Collecting statuses for %s (since_id=%s)", account.handle, account.last_status_id)
+
+        raw_statuses = get_account_statuses(
+            instance=account.instance,
+            account_id=account.account_id,
+            since_id=account.last_status_id,
+        )
+
+        new_count = 0
+        newest_id = account.last_status_id
+
+        for raw in raw_statuses:
+            parsed = parse_status(raw, account.account_id)
+            is_new = store_status(session, account, parsed)
+            if is_new:
+                new_count += 1
+
+            # Track the newest status ID
+            sid = parsed["status_id"]
+            if newest_id is None or sid > newest_id:
+                newest_id = sid
+
+        if newest_id:
+            account.last_status_id = newest_id
+        account.last_collected_at = datetime.now(timezone.utc)
+
+        log.statuses_collected = new_count
+        log.status = "success"
+        log.finished_at = datetime.now(timezone.utc)
+        session.commit()
+
+        logger.info("Collected %d new statuses for %s (total fetched: %d)",
+                     new_count, account.handle, len(raw_statuses))
+        return new_count
+
+    except RateLimitError as e:
+        log.status = "error"
+        log.error = f"Rate limited: {e}"
+        log.finished_at = datetime.now(timezone.utc)
+        session.commit()
+        logger.warning("Rate limited while collecting %s: %s", account.handle, e)
+        time.sleep(e.retry_after)
+        return 0
+
+    except MastodonAPIError as e:
+        log.status = "error"
+        log.error = str(e)
+        log.finished_at = datetime.now(timezone.utc)
+        session.commit()
+        logger.error("API error collecting %s: %s", account.handle, e)
+        return 0
+
+    except Exception as e:
+        log.status = "error"
+        log.error = str(e)
+        log.finished_at = datetime.now(timezone.utc)
+        session.commit()
+        logger.exception("Unexpected error collecting %s", account.handle)
+        return 0
+
+
+def run_collection_cycle():
+    """Run one full collection cycle across all monitored accounts."""
+    logger.info("=== Starting collection cycle ===")
+    session = get_session()
+
+    try:
+        accounts = sync_monitored_accounts(session)
+        logger.info("Monitoring %d active accounts", len(accounts))
+
+        total_new = 0
+        for account in accounts:
+            new = collect_account(session, account)
+            total_new += new
+            time.sleep(1)  # Brief pause between accounts to be polite
+
+        logger.info("=== Collection cycle complete: %d new statuses across %d accounts ===",
+                     total_new, len(accounts))
+
+    except Exception:
+        logger.exception("Fatal error in collection cycle")
+    finally:
+        session.close()
+
+
+def main():
+    """Entry point: initialize DB and start the scheduler."""
+    logger.info("Mastodon Collector starting up...")
+    logger.info("Poll interval: %d seconds (%d hours)", POLL_INTERVAL, POLL_INTERVAL // 3600)
+
+    init_db()
+    logger.info("Database initialized")
+
+    # Run one collection immediately on startup
+    run_collection_cycle()
+
+    # Schedule recurring collection
+    scheduler = BlockingScheduler()
+    scheduler.add_job(run_collection_cycle, "interval", seconds=POLL_INTERVAL)
+    logger.info("Scheduler started — next run in %d seconds", POLL_INTERVAL)
+
+    try:
+        scheduler.start()
+    except (KeyboardInterrupt, SystemExit):
+        logger.info("Collector shutting down")
+        scheduler.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/app/db.py b/app/db.py
new file mode 100644
index 0000000..f2cd94d
--- /dev/null
+++ b/app/db.py
@@ -0,0 +1,186 @@
+"""Database models and session management."""
+
+import os
+from datetime import datetime, timezone
+
+from sqlalchemy import (
+    create_engine,
+    Column,
+    Integer,
+    BigInteger,
+    String,
+    Text,
+    Boolean,
+    DateTime,
+    ForeignKey,
+    Index,
+    UniqueConstraint,
+    JSON,
+)
+from sqlalchemy.orm import declarative_base, sessionmaker, relationship
+
+DATABASE_URL = os.environ.get(
+    "DATABASE_URL", "postgresql://collector:collector_secret@localhost:5432/mastodon_collector"
+)
+
+engine = create_engine(DATABASE_URL, pool_pre_ping=True, pool_size=5, max_overflow=10)
+SessionLocal = sessionmaker(bind=engine)
+Base = declarative_base()
+
+
+class MonitoredAccount(Base):
+    """An account we are monitoring."""
+
+    __tablename__ = "monitored_accounts"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    username = Column(String(255), nullable=False)           # e.g. "user"
+    instance = Column(String(255), nullable=False)           # e.g. "mastodon.social"
+    account_id = Column(String(64), nullable=True)           # Mastodon numeric account ID on that instance
+    display_name = Column(String(512), nullable=True)
+    avatar_url = Column(Text, nullable=True)
+    is_active = Column(Boolean, default=True, nullable=False)
+    last_collected_at = Column(DateTime(timezone=True), nullable=True)
+    last_status_id = Column(String(64), nullable=True)       # For pagination: newest status ID we've seen
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    note = Column(Text, nullable=True)                       # Bio / description
+
+    statuses = relationship("Status", back_populates="account", lazy="dynamic")
+
+    __table_args__ = (
+        UniqueConstraint("username", "instance", name="uq_account_handle"),
+    )
+
+    @property
+    def handle(self):
+        return f"@{self.username}@{self.instance}"
+
+    def __repr__(self):
+        return f"<MonitoredAccount {self.handle}>"
+
+
+class Status(Base):
+    """A single post / toot collected from Mastodon."""
+
+    __tablename__ = "statuses"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    status_id = Column(String(64), nullable=False)           # Mastodon status ID
+    account_db_id = Column(Integer, ForeignKey("monitored_accounts.id"), nullable=False)
+    uri = Column(Text, nullable=False)                       # Canonical ActivityPub URI
+    url = Column(Text, nullable=True)                        # Human-readable URL
+    content = Column(Text, nullable=False)                   # HTML content
+    text_content = Column(Text, nullable=True)               # Stripped plain-text content
+    visibility = Column(String(32), nullable=True)           # public, unlisted, private, direct
+    created_at = Column(DateTime(timezone=True), nullable=False)
+    collected_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    language = Column(String(16), nullable=True)
+    sensitive = Column(Boolean, default=False)
+    spoiler_text = Column(Text, nullable=True)
+
+    # Reply / conversation tracking
+    in_reply_to_id = Column(String(64), nullable=True)       # Status ID being replied to
+    in_reply_to_account_id = Column(String(64), nullable=True)
+    conversation_id = Column(String(64), nullable=True)
+
+    # Interaction counts
+    replies_count = Column(Integer, default=0)
+    reblogs_count = Column(Integer, default=0)
+    favourites_count = Column(Integer, default=0)
+
+    # Classification for your analysis pipeline
+    status_type = Column(String(32), nullable=False, default="post")  # post, reply, mention, reblog
+
+    # Store the full JSON for future reference
+    raw_json = Column(JSON, nullable=True)
+
+    # Relationships
+    account = relationship("MonitoredAccount", back_populates="statuses")
+    mentions = relationship("Mention", back_populates="status", cascade="all, delete-orphan")
+    media_attachments = relationship("MediaAttachment", back_populates="status", cascade="all, delete-orphan")
+    tags = relationship("Tag", back_populates="status", cascade="all, delete-orphan")
+
+    __table_args__ = (
+        UniqueConstraint("status_id", "account_db_id", name="uq_status_per_account"),
+        Index("ix_status_created", "created_at"),
+        Index("ix_status_type", "status_type"),
+        Index("ix_status_account", "account_db_id"),
+        Index("ix_status_conversation", "conversation_id"),
+    )
+
+    def __repr__(self):
+        return f"<Status {self.status_id} type={self.status_type}>"
+
+
+class Mention(Base):
+    """A mention within a status (who was @-mentioned)."""
+
+    __tablename__ = "mentions"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
+    mentioned_account_id = Column(String(64), nullable=True)
+    mentioned_username = Column(String(255), nullable=False)
+    mentioned_acct = Column(String(512), nullable=False)     # full user@instance
+    mentioned_url = Column(Text, nullable=True)
+
+    status = relationship("Status", back_populates="mentions")
+
+
+class MediaAttachment(Base):
+    """Media attached to a status."""
+
+    __tablename__ = "media_attachments"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
+    media_id = Column(String(64), nullable=True)
+    media_type = Column(String(32), nullable=True)           # image, video, gifv, audio
+    url = Column(Text, nullable=True)
+    preview_url = Column(Text, nullable=True)
+    description = Column(Text, nullable=True)                # alt text
+
+    status = relationship("Status", back_populates="media_attachments")
+
+
+class Tag(Base):
+    """A hashtag used in a status."""
+
+    __tablename__ = "tags"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
+    name = Column(String(255), nullable=False)
+    url = Column(Text, nullable=True)
+
+    status = relationship("Status", back_populates="tags")
+
+    __table_args__ = (
+        Index("ix_tag_name", "name"),
+    )
+
+
+class CollectionLog(Base):
+    """Log of each collection run for monitoring."""
+
+    __tablename__ = "collection_logs"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    account_db_id = Column(Integer, ForeignKey("monitored_accounts.id"), nullable=True)
+    started_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    finished_at = Column(DateTime(timezone=True), nullable=True)
+    statuses_collected = Column(Integer, default=0)
+    error = Column(Text, nullable=True)
+    status = Column(String(32), default="running")           # running, success, error
+
+    account = relationship("MonitoredAccount")
+
+
+def init_db():
+    """Create all tables."""
+    Base.metadata.create_all(engine)
+
+
+def get_session():
+    """Get a new database session."""
+    return SessionLocal()
diff --git a/app/mastodon_api.py b/app/mastodon_api.py
new file mode 100644
index 0000000..9fd7026
--- /dev/null
+++ b/app/mastodon_api.py
@@ -0,0 +1,226 @@
+"""Mastodon public API client — no authentication required."""
+
+import logging
+import re
+import time
+from html import unescape
+from typing import Optional
+from urllib.parse import urljoin
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+# Respect rate limits: Mastodon returns 300 requests per 5 min by default
+DEFAULT_TIMEOUT = 30
+MAX_RETRIES = 3
+RETRY_BACKOFF = 5  # seconds
+
+
+class MastodonAPIError(Exception):
+    pass
+
+
+class RateLimitError(MastodonAPIError):
+    def __init__(self, retry_after: float = 60):
+        self.retry_after = retry_after
+        super().__init__(f"Rate limited, retry after {retry_after}s")
+
+
+def _strip_html(html: str) -> str:
+    """Strip HTML tags and decode entities to get plain text."""
+    # Replace <br> and </p> with newlines
+    text = re.sub(r"<br\s*/?>", "\n", html)
+    text = re.sub(r"</p>", "\n", text)
+    # Remove all remaining tags
+    text = re.sub(r"<[^>]+>", "", text)
+    return unescape(text).strip()
+
+
+def _api_get(instance: str, path: str, params: Optional[dict] = None) -> requests.Response:
+    """Make a GET request to a Mastodon instance's public API."""
+    url = f"https://{instance}{path}"
+    headers = {"Accept": "application/json", "User-Agent": "MastodonCollector/1.0"}
+
+    for attempt in range(MAX_RETRIES):
+        try:
+            resp = requests.get(url, params=params, headers=headers, timeout=DEFAULT_TIMEOUT)
+
+            if resp.status_code == 429:
+                retry_after = float(resp.headers.get("X-RateLimit-Reset", 60))
+                # If it's an ISO timestamp, calculate delta
+                if retry_after > 1_000_000:
+                    retry_after = 60
+                logger.warning("Rate limited by %s, waiting %.0fs", instance, retry_after)
+                raise RateLimitError(retry_after)
+
+            if resp.status_code == 404:
+                raise MastodonAPIError(f"Not found: {url}")
+
+            resp.raise_for_status()
+            return resp
+
+        except RateLimitError:
+            raise
+        except requests.RequestException as e:
+            if attempt < MAX_RETRIES - 1:
+                wait = RETRY_BACKOFF * (attempt + 1)
+                logger.warning("Request to %s failed (attempt %d/%d): %s — retrying in %ds",
+                               url, attempt + 1, MAX_RETRIES, e, wait)
+                time.sleep(wait)
+            else:
+                raise MastodonAPIError(f"Failed after {MAX_RETRIES} attempts: {e}") from e
+
+    raise MastodonAPIError("Unexpected retry exhaustion")
+
+
+def lookup_account(instance: str, username: str) -> dict:
+    """Look up an account on an instance by username. Returns the account JSON."""
+    # Try the v1 lookup endpoint first (available on most instances)
+    try:
+        resp = _api_get(instance, "/api/v1/accounts/lookup", {"acct": username})
+        return resp.json()
+    except MastodonAPIError:
+        pass
+
+    # Fallback: search for the account
+    resp = _api_get(instance, "/api/v2/search", {"q": f"@{username}@{instance}", "type": "accounts", "limit": 1})
+    data = resp.json()
+    accounts = data.get("accounts", [])
+    if not accounts:
+        raise MastodonAPIError(f"Account @{username} not found on {instance}")
+    return accounts[0]
+
+
+def get_account_statuses(
+    instance: str,
+    account_id: str,
+    since_id: Optional[str] = None,
+    limit: int = 40,
+    exclude_reblogs: bool = False,
+) -> list[dict]:
+    """
+    Fetch statuses from an account. Handles pagination to get all new statuses.
+    Returns list of status dicts, oldest first.
+    """
+    all_statuses = []
+    params = {"limit": min(limit, 40)}
+    if since_id:
+        params["since_id"] = since_id
+    if exclude_reblogs:
+        params["exclude_reblogs"] = "true"
+
+    path = f"/api/v1/accounts/{account_id}/statuses"
+
+    # Paginate through results
+    max_pages = 25  # safety limit
+    page = 0
+
+    while page < max_pages:
+        resp = _api_get(instance, path, params)
+        statuses = resp.json()
+
+        if not statuses:
+            break
+
+        all_statuses.extend(statuses)
+        page += 1
+
+        # Check Link header for next page
+        link_header = resp.headers.get("Link", "")
+        next_match = re.search(r'<([^>]+)>;\s*rel="next"', link_header)
+        if not next_match:
+            break
+
+        # Parse the next URL for max_id
+        next_url = next_match.group(1)
+        max_id_match = re.search(r"max_id=(\d+)", next_url)
+        if not max_id_match:
+            break
+
+        params["max_id"] = max_id_match.group(1)
+        # Remove since_id for subsequent pages — we're paginating backwards
+        # Actually we keep since_id as the floor
+        time.sleep(0.5)  # Be polite between pages
+
+    # Return oldest first so we can process chronologically
+    all_statuses.reverse()
+    return all_statuses
+
+
+def get_status_context(instance: str, status_id: str) -> dict:
+    """Get the context (ancestors + descendants) of a status. Useful for threading."""
+    resp = _api_get(instance, f"/api/v1/statuses/{status_id}/context")
+    return resp.json()
+
+
+def classify_status(status: dict, monitored_account_id: str) -> str:
+    """
+    Classify a status as: post, reply, mention, or reblog.
+    - reblog: the status is a boost of another status
+    - reply: the status is in reply to another status
+    - mention: the status mentions other accounts (but is not a reply)
+    - post: a standalone original post
+    """
+    if status.get("reblog"):
+        return "reblog"
+    if status.get("in_reply_to_id"):
+        return "reply"
+    mentions = status.get("mentions", [])
+    if mentions:
+        # Only classify as "mention" if it mentions someone other than self
+        other_mentions = [m for m in mentions if m.get("id") != monitored_account_id]
+        if other_mentions:
+            return "mention"
+    return "post"
+
+
+def parse_status(status: dict, monitored_account_id: str) -> dict:
+    """Parse a raw Mastodon status JSON into a flat dict for storage."""
+    # If it's a reblog, we store the original content but flag it
+    actual = status.get("reblog") or status
+    content_html = actual.get("content", "")
+
+    return {
+        "status_id": status["id"],
+        "uri": status.get("uri", ""),
+        "url": status.get("url") or actual.get("url", ""),
+        "content": content_html,
+        "text_content": _strip_html(content_html),
+        "visibility": status.get("visibility", "public"),
+        "created_at": status.get("created_at"),
+        "language": status.get("language") or actual.get("language"),
+        "sensitive": status.get("sensitive", False),
+        "spoiler_text": status.get("spoiler_text", ""),
+        "in_reply_to_id": status.get("in_reply_to_id"),
+        "in_reply_to_account_id": status.get("in_reply_to_account_id"),
+        "conversation_id": status.get("conversation", {}).get("id") if isinstance(status.get("conversation"), dict) else None,
+        "replies_count": status.get("replies_count", 0),
+        "reblogs_count": status.get("reblogs_count", 0),
+        "favourites_count": status.get("favourites_count", 0),
+        "status_type": classify_status(status, monitored_account_id),
+        "mentions": [
+            {
+                "mentioned_account_id": m.get("id"),
+                "mentioned_username": m.get("username", ""),
+                "mentioned_acct": m.get("acct", ""),
+                "mentioned_url": m.get("url", ""),
+            }
+            for m in (actual.get("mentions") or [])
+        ],
+        "media_attachments": [
+            {
+                "media_id": ma.get("id"),
+                "media_type": ma.get("type"),
+                "url": ma.get("url"),
+                "preview_url": ma.get("preview_url"),
+                "description": ma.get("description"),
+            }
+            for ma in (actual.get("media_attachments") or [])
+        ],
+        "tags": [
+            {"name": t.get("name", ""), "url": t.get("url", "")}
+            for t in (actual.get("tags") or [])
+        ],
+        "raw_json": status,
+    }
diff --git a/app/templates/accounts.html b/app/templates/accounts.html
new file mode 100644
index 0000000..720621b
--- /dev/null
+++ b/app/templates/accounts.html
@@ -0,0 +1,77 @@
+{% extends "base.html" %}
+{% block title %}Accounts — Mastodon Collector{% endblock %}
+
+{% block content %}
+<div class="flex justify-between items-center mb-4">
+    <h1>Monitored Accounts</h1>
+</div>
+
+<div class="card mb-4">
+    <h2>Add Account</h2>
+    <form method="POST" action="{{ url_for('accounts_add') }}" class="form-inline mt-2">
+        <input type="text" name="handle" placeholder="@user@instance.social" style="width: 320px;" required>
+        <button type="submit" class="btn btn-primary">Add Account</button>
+    </form>
+    <p class="text-muted text-sm mt-2">
+        You can also add accounts by editing <code>accounts.txt</code> — the collector picks them up automatically.
+    </p>
+</div>
+
+<div class="card">
+    <table>
+        <thead>
+            <tr>
+                <th>Handle</th>
+                <th>Display Name</th>
+                <th>Account ID</th>
+                <th>Status</th>
+                <th>Last Collected</th>
+                <th>Actions</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for acct in accounts %}
+            <tr>
+                <td>
+                    <a href="{{ url_for('statuses_list', account_id=acct.id) }}" style="color: var(--accent);">
+                        {{ acct.handle }}
+                    </a>
+                </td>
+                <td>{{ acct.display_name or '—' }}</td>
+                <td class="text-muted text-sm">{{ acct.account_id or 'unresolved' }}</td>
+                <td>
+                    {% if acct.is_active %}
+                        <span class="badge badge-active">Active</span>
+                    {% else %}
+                        <span class="badge badge-paused">Paused</span>
+                    {% endif %}
+                </td>
+                <td class="text-muted text-sm">
+                    {{ acct.last_collected_at.strftime('%Y-%m-%d %H:%M') if acct.last_collected_at else 'Never' }}
+                </td>
+                <td>
+                    <div class="flex gap-2">
+                        <form method="POST" action="{{ url_for('accounts_toggle', account_id=acct.id) }}">
+                            <button type="submit" class="btn btn-outline btn-sm">
+                                {{ 'Pause' if acct.is_active else 'Resume' }}
+                            </button>
+                        </form>
+                        <form method="POST" action="{{ url_for('accounts_delete', account_id=acct.id) }}"
+                              onsubmit="return confirm('Delete {{ acct.handle }} and ALL collected data? This cannot be undone.')">
+                            <button type="submit" class="btn btn-danger btn-sm">Delete</button>
+                        </form>
+                    </div>
+                </td>
+            </tr>
+            {% endfor %}
+            {% if not accounts %}
+            <tr>
+                <td colspan="6" class="text-muted" style="text-align:center; padding: 24px;">
+                    No accounts yet. Add one above or edit <code>accounts.txt</code>.
+                </td>
+            </tr>
+            {% endif %}
+        </tbody>
+    </table>
+</div>
+{% endblock %}
diff --git a/app/templates/base.html b/app/templates/base.html
new file mode 100644
index 0000000..d370f5a
--- /dev/null
+++ b/app/templates/base.html
@@ -0,0 +1,262 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}Mastodon Collector{% endblock %}</title>
+    <style>
+        :root {
+            --bg: #1a1a2e;
+            --bg-card: #16213e;
+            --bg-hover: #1a2745;
+            --text: #e0e0e0;
+            --text-muted: #8892a4;
+            --accent: #e2dbff;
+            --accent-hover: #ffffff;
+            --link: #7dd3fc;
+            --success: #2ecc71;
+            --warning: #f39c12;
+            --danger: #e74c3c;
+            --border: #2a3a5c;
+            --tag-post: #3498db;
+            --tag-reply: #e67e22;
+            --tag-mention: #9b59b6;
+            --tag-reblog: #1abc9c;
+        }
+
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            background: var(--bg);
+            color: var(--text);
+            line-height: 1.6;
+        }
+
+        .container { max-width: 1200px; margin: 0 auto; padding: 0 20px; }
+
+        nav {
+            background: var(--bg-card);
+            border-bottom: 1px solid var(--border);
+            padding: 12px 0;
+            position: sticky;
+            top: 0;
+            z-index: 100;
+        }
+        nav .container {
+            display: flex;
+            align-items: center;
+            gap: 24px;
+        }
+        nav .logo {
+            font-size: 18px;
+            font-weight: 700;
+            color: var(--accent);
+            text-decoration: none;
+        }
+        nav a {
+            color: var(--text-muted);
+            text-decoration: none;
+            font-size: 14px;
+            padding: 6px 12px;
+            border-radius: 6px;
+            transition: all 0.2s;
+        }
+        nav a:hover, nav a.active {
+            color: var(--text);
+            background: var(--bg-hover);
+        }
+
+        main { padding: 24px 0; }
+
+        h1 { font-size: 24px; margin-bottom: 20px; }
+        h2 { font-size: 18px; margin-bottom: 12px; color: var(--text-muted); }
+
+        .flash {
+            padding: 12px 16px;
+            border-radius: 8px;
+            margin-bottom: 16px;
+            font-size: 14px;
+        }
+        .flash.success { background: rgba(46, 204, 113, 0.15); border: 1px solid var(--success); color: var(--success); }
+        .flash.error { background: rgba(231, 76, 60, 0.15); border: 1px solid var(--danger); color: var(--danger); }
+        .flash.info { background: rgba(108, 99, 255, 0.15); border: 1px solid var(--accent); color: var(--accent); }
+
+        .stats-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+            gap: 16px;
+            margin-bottom: 24px;
+        }
+        .stat-card {
+            background: var(--bg-card);
+            border: 1px solid var(--border);
+            border-radius: 10px;
+            padding: 20px;
+            text-align: center;
+        }
+        .stat-card .number {
+            font-size: 32px;
+            font-weight: 700;
+            color: var(--accent);
+        }
+        .stat-card .label {
+            font-size: 13px;
+            color: var(--text-muted);
+            margin-top: 4px;
+        }
+
+        .card {
+            background: var(--bg-card);
+            border: 1px solid var(--border);
+            border-radius: 10px;
+            padding: 20px;
+            margin-bottom: 16px;
+        }
+
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            font-size: 14px;
+        }
+        th, td {
+            padding: 10px 12px;
+            text-align: left;
+            border-bottom: 1px solid var(--border);
+        }
+        th {
+            color: var(--text-muted);
+            font-weight: 600;
+            font-size: 12px;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+        tr:hover { background: var(--bg-hover); }
+        td a { color: var(--link); text-decoration: none; }
+        td a:hover { color: #ffffff; text-decoration: underline; }
+
+        .badge {
+            display: inline-block;
+            padding: 2px 8px;
+            border-radius: 12px;
+            font-size: 11px;
+            font-weight: 600;
+            text-transform: uppercase;
+        }
+        .badge-post { background: rgba(52, 152, 219, 0.2); color: var(--tag-post); }
+        .badge-reply { background: rgba(230, 126, 34, 0.2); color: var(--tag-reply); }
+        .badge-mention { background: rgba(155, 89, 182, 0.2); color: var(--tag-mention); }
+        .badge-reblog { background: rgba(26, 188, 156, 0.2); color: var(--tag-reblog); }
+        .badge-active { background: rgba(46, 204, 113, 0.2); color: var(--success); }
+        .badge-paused { background: rgba(243, 156, 18, 0.2); color: var(--warning); }
+        .badge-success { background: rgba(46, 204, 113, 0.2); color: var(--success); }
+        .badge-error { background: rgba(231, 76, 60, 0.2); color: var(--danger); }
+        .badge-running { background: rgba(108, 99, 255, 0.2); color: var(--accent); }
+
+        .btn {
+            display: inline-block;
+            padding: 8px 16px;
+            border-radius: 6px;
+            font-size: 14px;
+            font-weight: 500;
+            text-decoration: none;
+            border: none;
+            cursor: pointer;
+            transition: all 0.2s;
+        }
+        .btn-primary { background: var(--accent); color: white; }
+        .btn-primary:hover { background: var(--accent-hover); }
+        .btn-sm { padding: 4px 10px; font-size: 12px; }
+        .btn-danger { background: var(--danger); color: white; }
+        .btn-danger:hover { background: #c0392b; }
+        .btn-outline {
+            background: transparent;
+            border: 1px solid var(--border);
+            color: var(--text-muted);
+        }
+        .btn-outline:hover { border-color: var(--accent); color: var(--accent); }
+
+        input[type="text"], input[type="search"], select {
+            background: var(--bg);
+            border: 1px solid var(--border);
+            border-radius: 6px;
+            padding: 8px 12px;
+            color: var(--text);
+            font-size: 14px;
+            outline: none;
+        }
+        input:focus, select:focus { border-color: var(--accent); }
+
+        .form-inline {
+            display: flex;
+            gap: 8px;
+            align-items: center;
+            flex-wrap: wrap;
+        }
+
+        .status-content {
+            font-size: 14px;
+            line-height: 1.7;
+            word-break: break-word;
+        }
+        .status-content a { color: var(--accent); }
+
+        .pagination {
+            display: flex;
+            gap: 8px;
+            align-items: center;
+            justify-content: center;
+            margin-top: 20px;
+        }
+        .pagination a {
+            color: var(--text-muted);
+            text-decoration: none;
+            padding: 6px 12px;
+            border: 1px solid var(--border);
+            border-radius: 6px;
+        }
+        .pagination a:hover { border-color: var(--accent); color: var(--accent); }
+        .pagination .current {
+            background: var(--accent);
+            color: white;
+            padding: 6px 12px;
+            border-radius: 6px;
+        }
+
+        .text-muted { color: var(--text-muted); }
+        .text-sm { font-size: 13px; }
+        .mt-2 { margin-top: 8px; }
+        .mt-4 { margin-top: 16px; }
+        .mb-4 { margin-bottom: 16px; }
+        .flex { display: flex; }
+        .gap-2 { gap: 8px; }
+        .items-center { align-items: center; }
+        .justify-between { justify-content: space-between; }
+        .truncate { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; max-width: 400px; }
+    </style>
+</head>
+<body>
+    <nav>
+        <div class="container">
+            <a href="{{ url_for('index') }}" class="logo">Mastodon Collector</a>
+            <a href="{{ url_for('index') }}" class="{{ 'active' if request.endpoint == 'index' }}">Dashboard</a>
+            <a href="{{ url_for('accounts_list') }}" class="{{ 'active' if request.endpoint == 'accounts_list' }}">Accounts</a>
+            <a href="{{ url_for('statuses_list') }}" class="{{ 'active' if request.endpoint == 'statuses_list' }}">Statuses</a>
+            <a href="{{ url_for('export_csv') }}">Export CSV</a>
+            <a href="{{ url_for('api_stats') }}">API</a>
+        </div>
+    </nav>
+
+    <main>
+        <div class="container">
+            {% with messages = get_flashed_messages(with_categories=true) %}
+                {% for category, message in messages %}
+                    <div class="flash {{ category }}">{{ message }}</div>
+                {% endfor %}
+            {% endwith %}
+
+            {% block content %}{% endblock %}
+        </div>
+    </main>
+</body>
+</html>
diff --git a/app/templates/index.html b/app/templates/index.html
new file mode 100644
index 0000000..e2e04d0
--- /dev/null
+++ b/app/templates/index.html
@@ -0,0 +1,123 @@
+{% extends "base.html" %}
+{% block title %}Dashboard — Mastodon Collector{% endblock %}
+
+{% block content %}
+<h1>Dashboard</h1>
+
+<div class="stats-grid">
+    <div class="stat-card">
+        <div class="number">{{ total_statuses }}</div>
+        <div class="label">Total Statuses</div>
+    </div>
+    <div class="stat-card">
+        <div class="number">{{ total_posts }}</div>
+        <div class="label">Posts</div>
+    </div>
+    <div class="stat-card">
+        <div class="number">{{ total_replies }}</div>
+        <div class="label">Replies</div>
+    </div>
+    <div class="stat-card">
+        <div class="number">{{ total_mentions }}</div>
+        <div class="label">Mentions</div>
+    </div>
+    <div class="stat-card">
+        <div class="number">{{ total_reblogs }}</div>
+        <div class="label">Reblogs</div>
+    </div>
+    <div class="stat-card">
+        <div class="number">{{ account_stats|length }}</div>
+        <div class="label">Monitored Accounts</div>
+    </div>
+</div>
+
+<div class="card">
+    <h2>Monitored Accounts</h2>
+    <table>
+        <thead>
+            <tr>
+                <th>Account</th>
+                <th>Instance</th>
+                <th>Status</th>
+                <th>Collected</th>
+                <th>Last Run</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for item in account_stats %}
+            <tr>
+                <td>
+                    <a href="{{ url_for('statuses_list', account_id=item.account.id) }}">
+                        {{ item.account.handle }}
+                    </a>
+                    {% if item.account.display_name %}
+                        <span class="text-muted text-sm">— {{ item.account.display_name }}</span>
+                    {% endif %}
+                </td>
+                <td class="text-muted">{{ item.account.instance }}</td>
+                <td>
+                    {% if item.account.is_active %}
+                        <span class="badge badge-active">Active</span>
+                    {% else %}
+                        <span class="badge badge-paused">Paused</span>
+                    {% endif %}
+                </td>
+                <td>{{ item.status_count }}</td>
+                <td class="text-muted text-sm">
+                    {% if item.account.last_collected_at %}
+                        {{ item.account.last_collected_at.strftime('%Y-%m-%d %H:%M') }}
+                        {% if item.last_log %}
+                            <span class="badge badge-{{ item.last_log.status }}">{{ item.last_log.status }}</span>
+                        {% endif %}
+                    {% else %}
+                        Never
+                    {% endif %}
+                </td>
+            </tr>
+            {% endfor %}
+            {% if not account_stats %}
+            <tr>
+                <td colspan="5" class="text-muted" style="text-align:center; padding: 24px;">
+                    No accounts being monitored yet.
+                    <a href="{{ url_for('accounts_list') }}" style="color: var(--accent);">Add some accounts</a>.
+                </td>
+            </tr>
+            {% endif %}
+        </tbody>
+    </table>
+</div>
+
+{% if recent_logs %}
+<div class="card mt-4">
+    <h2>Recent Collection Runs</h2>
+    <table>
+        <thead>
+            <tr>
+                <th>Time</th>
+                <th>Account</th>
+                <th>Status</th>
+                <th>Collected</th>
+                <th>Error</th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for log in recent_logs %}
+            <tr>
+                <td class="text-sm">{{ log.started_at.strftime('%Y-%m-%d %H:%M:%S') if log.started_at }}</td>
+                <td>
+                    {% if log.account %}
+                        {{ log.account.handle }}
+                    {% else %}
+                        —
+                    {% endif %}
+                </td>
+                <td><span class="badge badge-{{ log.status }}">{{ log.status }}</span></td>
+                <td>{{ log.statuses_collected }}</td>
+                <td class="text-muted text-sm truncate">{{ log.error or '—' }}</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+{% endif %}
+{% endblock %}
diff --git a/app/templates/status_detail.html b/app/templates/status_detail.html
new file mode 100644
index 0000000..f60972f
--- /dev/null
+++ b/app/templates/status_detail.html
@@ -0,0 +1,140 @@
+{% extends "base.html" %}
+{% block title %}Status Detail — Mastodon Collector{% endblock %}
+
+{% block content %}
+<div class="flex items-center gap-2 mb-4">
+    <a href="{{ url_for('statuses_list') }}" class="btn btn-outline btn-sm">← Back</a>
+    <h1>Status Detail</h1>
+</div>
+
+<div class="card mb-4">
+    <div class="flex justify-between items-center mb-4">
+        <div>
+            <span class="badge badge-{{ status.status_type }}">{{ status.status_type }}</span>
+            <span class="text-muted text-sm" style="margin-left: 8px;">
+                {{ status.created_at.strftime('%Y-%m-%d %H:%M:%S UTC') if status.created_at }}
+            </span>
+        </div>
+        <div>
+            {% if status.url %}
+                <a href="{{ status.url }}" target="_blank" class="btn btn-outline btn-sm">View on Mastodon ↗</a>
+            {% endif %}
+        </div>
+    </div>
+
+    <table>
+        <tr>
+            <th style="width: 160px;">Account</th>
+            <td>{{ status.account.handle }}{% if status.account.display_name %} — {{ status.account.display_name }}{% endif %}</td>
+        </tr>
+        <tr>
+            <th>Visibility</th>
+            <td>{{ status.visibility }}</td>
+        </tr>
+        <tr>
+            <th>Language</th>
+            <td>{{ status.language or 'Unknown' }}</td>
+        </tr>
+        {% if status.in_reply_to_id %}
+        <tr>
+            <th>In Reply To</th>
+            <td>Status {{ status.in_reply_to_id }}{% if status.in_reply_to_account_id %} (account {{ status.in_reply_to_account_id }}){% endif %}</td>
+        </tr>
+        {% endif %}
+        {% if status.conversation_id %}
+        <tr>
+            <th>Conversation</th>
+            <td>{{ status.conversation_id }}</td>
+        </tr>
+        {% endif %}
+        <tr>
+            <th>Interactions</th>
+            <td>↩ {{ status.replies_count }} replies &nbsp; ⟳ {{ status.reblogs_count }} reblogs &nbsp; ★ {{ status.favourites_count }} favourites</td>
+        </tr>
+        {% if status.sensitive %}
+        <tr>
+            <th>Sensitive</th>
+            <td>Yes{% if status.spoiler_text %} — {{ status.spoiler_text }}{% endif %}</td>
+        </tr>
+        {% endif %}
+        <tr>
+            <th>Mastodon Status ID</th>
+            <td class="text-muted">{{ status.status_id }}</td>
+        </tr>
+        <tr>
+            <th>URI</th>
+            <td class="text-muted text-sm">{{ status.uri }}</td>
+        </tr>
+    </table>
+</div>
+
+<div class="card mb-4">
+    <h2>Content (HTML)</h2>
+    <div class="status-content mt-2" style="padding: 16px; background: var(--bg); border-radius: 8px;">
+        {{ status.content | safe }}
+    </div>
+</div>
+
+<div class="card mb-4">
+    <h2>Content (Plain Text)</h2>
+    <div class="mt-2" style="padding: 16px; background: var(--bg); border-radius: 8px; white-space: pre-wrap; font-family: monospace; font-size: 13px;">{{ status.text_content }}</div>
+</div>
+
+{% if status.mentions %}
+<div class="card mb-4">
+    <h2>Mentions ({{ status.mentions|length }})</h2>
+    <table>
+        <thead>
+            <tr><th>Account</th><th>URL</th></tr>
+        </thead>
+        <tbody>
+            {% for m in status.mentions %}
+            <tr>
+                <td>@{{ m.mentioned_acct }}</td>
+                <td class="text-muted text-sm">{{ m.mentioned_url }}</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+{% endif %}
+
+{% if status.media_attachments %}
+<div class="card mb-4">
+    <h2>Media Attachments ({{ status.media_attachments|length }})</h2>
+    <table>
+        <thead>
+            <tr><th>Type</th><th>Description</th><th>URL</th></tr>
+        </thead>
+        <tbody>
+            {% for ma in status.media_attachments %}
+            <tr>
+                <td>{{ ma.media_type }}</td>
+                <td>{{ ma.description or '—' }}</td>
+                <td class="text-sm"><a href="{{ ma.url }}" target="_blank" style="color: var(--accent);">View ↗</a></td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+{% endif %}
+
+{% if status.tags %}
+<div class="card mb-4">
+    <h2>Tags</h2>
+    <div class="mt-2">
+        {% for t in status.tags %}
+            <span class="badge" style="background: var(--bg); margin: 2px;">#{{ t.name }}</span>
+        {% endfor %}
+    </div>
+</div>
+{% endif %}
+
+<div class="card">
+    <h2>Raw JSON</h2>
+    <details>
+        <summary class="text-muted" style="cursor: pointer; padding: 8px 0;">Click to expand</summary>
+        <pre style="padding: 16px; background: var(--bg); border-radius: 8px; overflow-x: auto; font-size: 12px; max-height: 600px; overflow-y: auto;">{{ status.raw_json | tojson(indent=2) }}</pre>
+    </details>
+</div>
+{% endblock %}
diff --git a/app/templates/statuses.html b/app/templates/statuses.html
new file mode 100644
index 0000000..d7a64af
--- /dev/null
+++ b/app/templates/statuses.html
@@ -0,0 +1,112 @@
+{% extends "base.html" %}
+{% block title %}Statuses — Mastodon Collector{% endblock %}
+
+{% block content %}
+<div class="flex justify-between items-center mb-4">
+    <h1>Collected Statuses <span class="text-muted text-sm">({{ total }})</span></h1>
+    <a href="{{ url_for('export_csv', account_id=current_account_id or '', type=current_type or '') }}"
+       class="btn btn-outline btn-sm">Export CSV</a>
+</div>
+
+<div class="card mb-4">
+    <form method="GET" action="{{ url_for('statuses_list') }}" class="form-inline">
+        <select name="account_id">
+            <option value="">All accounts</option>
+            {% for acct in accounts %}
+                <option value="{{ acct.id }}" {{ 'selected' if current_account_id == acct.id }}>
+                    {{ acct.handle }}
+                </option>
+            {% endfor %}
+        </select>
+
+        <select name="type">
+            <option value="">All types</option>
+            <option value="post" {{ 'selected' if current_type == 'post' }}>Posts</option>
+            <option value="reply" {{ 'selected' if current_type == 'reply' }}>Replies</option>
+            <option value="mention" {{ 'selected' if current_type == 'mention' }}>Mentions</option>
+            <option value="reblog" {{ 'selected' if current_type == 'reblog' }}>Reblogs</option>
+        </select>
+
+        <input type="search" name="q" placeholder="Search text content..." value="{{ search }}" style="width: 260px;">
+        <button type="submit" class="btn btn-primary btn-sm">Filter</button>
+        {% if current_account_id or current_type or search %}
+            <a href="{{ url_for('statuses_list') }}" class="btn btn-outline btn-sm">Clear</a>
+        {% endif %}
+    </form>
+</div>
+
+<div class="card">
+    <table>
+        <thead>
+            <tr>
+                <th>Date</th>
+                <th>Account</th>
+                <th>Type</th>
+                <th>Content</th>
+                <th>Interactions</th>
+                <th></th>
+            </tr>
+        </thead>
+        <tbody>
+            {% for s in statuses %}
+            <tr>
+                <td class="text-sm" style="white-space:nowrap;">
+                    {{ s.created_at.strftime('%Y-%m-%d %H:%M') if s.created_at }}
+                </td>
+                <td class="text-sm">{{ s.account.handle }}</td>
+                <td>
+                    <span class="badge badge-{{ s.status_type }}">{{ s.status_type }}</span>
+                </td>
+                <td>
+                    <div class="truncate status-content">
+                        {{ s.text_content[:200] }}{% if s.text_content and s.text_content|length > 200 %}...{% endif %}
+                    </div>
+                    {% if s.tags %}
+                        <div class="text-sm text-muted mt-2">
+                            {% for t in s.tags %}
+                                <span>#{{ t.name }}</span>
+                            {% endfor %}
+                        </div>
+                    {% endif %}
+                </td>
+                <td class="text-sm text-muted" style="white-space:nowrap;">
+                    ↩ {{ s.replies_count }} &nbsp; ⟳ {{ s.reblogs_count }} &nbsp; ★ {{ s.favourites_count }}
+                </td>
+                <td>
+                    <a href="{{ url_for('status_detail', status_db_id=s.id) }}" class="btn btn-outline btn-sm">View</a>
+                </td>
+            </tr>
+            {% endfor %}
+            {% if not statuses %}
+            <tr>
+                <td colspan="6" class="text-muted" style="text-align:center; padding: 24px;">
+                    No statuses found. The collector runs every {{ (config.get('POLL_INTERVAL_SECONDS', 14400)|int // 3600) }} hours, or you can wait for the first collection cycle.
+                </td>
+            </tr>
+            {% endif %}
+        </tbody>
+    </table>
+</div>
+
+{% if total_pages > 1 %}
+<div class="pagination">
+    {% if page > 1 %}
+        <a href="{{ url_for('statuses_list', page=page-1, account_id=current_account_id, type=current_type, q=search) }}">← Prev</a>
+    {% endif %}
+
+    {% for p in range(1, total_pages + 1) %}
+        {% if p == page %}
+            <span class="current">{{ p }}</span>
+        {% elif p <= 3 or p >= total_pages - 2 or (p >= page - 2 and p <= page + 2) %}
+            <a href="{{ url_for('statuses_list', page=p, account_id=current_account_id, type=current_type, q=search) }}">{{ p }}</a>
+        {% elif p == 4 or p == total_pages - 3 %}
+            <span class="text-muted">…</span>
+        {% endif %}
+    {% endfor %}
+
+    {% if page < total_pages %}
+        <a href="{{ url_for('statuses_list', page=page+1, account_id=current_account_id, type=current_type, q=search) }}">Next →</a>
+    {% endif %}
+</div>
+{% endif %}
+{% endblock %}
diff --git a/app/web.py b/app/web.py
new file mode 100644
index 0000000..698f717
--- /dev/null
+++ b/app/web.py
@@ -0,0 +1,383 @@
+"""Flask web application for managing monitored accounts and viewing collected data."""
+
+import os
+import logging
+from datetime import datetime, timezone
+
+from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
+from sqlalchemy import func, desc
+
+from app.db import (
+    init_db,
+    get_session,
+    MonitoredAccount,
+    Status,
+    Mention,
+    CollectionLog,
+)
+from app.mastodon_api import lookup_account, MastodonAPIError
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+app = Flask(__name__)
+app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-key")
+
+# Initialize database on startup
+with app.app_context():
+    init_db()
+
+
+@app.route("/")
+def index():
+    """Dashboard overview."""
+    session = get_session()
+    try:
+        accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all()
+        total_statuses = session.query(func.count(Status.id)).scalar() or 0
+        total_posts = session.query(func.count(Status.id)).filter(Status.status_type == "post").scalar() or 0
+        total_replies = session.query(func.count(Status.id)).filter(Status.status_type == "reply").scalar() or 0
+        total_mentions = session.query(func.count(Status.id)).filter(Status.status_type == "mention").scalar() or 0
+        total_reblogs = session.query(func.count(Status.id)).filter(Status.status_type == "reblog").scalar() or 0
+
+        # Per-account stats
+        account_stats = []
+        for acct in accounts:
+            count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0
+            last_log = (
+                session.query(CollectionLog)
+                .filter_by(account_db_id=acct.id)
+                .order_by(desc(CollectionLog.started_at))
+                .first()
+            )
+            account_stats.append({
+                "account": acct,
+                "status_count": count,
+                "last_log": last_log,
+            })
+
+        # Recent collection logs
+        recent_logs = (
+            session.query(CollectionLog)
+            .order_by(desc(CollectionLog.started_at))
+            .limit(20)
+            .all()
+        )
+
+        return render_template(
+            "index.html",
+            account_stats=account_stats,
+            total_statuses=total_statuses,
+            total_posts=total_posts,
+            total_replies=total_replies,
+            total_mentions=total_mentions,
+            total_reblogs=total_reblogs,
+            recent_logs=recent_logs,
+        )
+    finally:
+        session.close()
+
+
+@app.route("/accounts")
+def accounts_list():
+    """List all monitored accounts."""
+    session = get_session()
+    try:
+        accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all()
+        return render_template("accounts.html", accounts=accounts)
+    finally:
+        session.close()
+
+
+@app.route("/accounts/add", methods=["POST"])
+def accounts_add():
+    """Add a new account to monitor."""
+    handle = request.form.get("handle", "").strip().lstrip("@")
+    if "@" not in handle:
+        flash("Invalid handle format. Use @user@instance.social", "error")
+        return redirect(url_for("accounts_list"))
+
+    username, instance = handle.split("@", 1)
+    if not username or not instance:
+        flash("Invalid handle format. Use @user@instance.social", "error")
+        return redirect(url_for("accounts_list"))
+
+    session = get_session()
+    try:
+        existing = session.query(MonitoredAccount).filter_by(username=username, instance=instance).first()
+        if existing:
+            if not existing.is_active:
+                existing.is_active = True
+                session.commit()
+                flash(f"Re-activated {existing.handle}", "success")
+            else:
+                flash(f"{existing.handle} is already being monitored", "info")
+            return redirect(url_for("accounts_list"))
+
+        # Try to resolve the account first
+        try:
+            data = lookup_account(instance, username)
+            acct = MonitoredAccount(
+                username=username,
+                instance=instance,
+                account_id=data["id"],
+                display_name=data.get("display_name", ""),
+                avatar_url=data.get("avatar", ""),
+                note=data.get("note", ""),
+                is_active=True,
+            )
+        except MastodonAPIError as e:
+            logger.warning("Could not resolve account @%s@%s: %s — adding anyway", username, instance, e)
+            acct = MonitoredAccount(
+                username=username,
+                instance=instance,
+                is_active=True,
+            )
+
+        session.add(acct)
+        session.commit()
+        flash(f"Added {acct.handle} to monitoring list", "success")
+        return redirect(url_for("accounts_list"))
+
+    finally:
+        session.close()
+
+
+@app.route("/accounts/<int:account_id>/toggle", methods=["POST"])
+def accounts_toggle(account_id):
+    """Toggle an account's active status."""
+    session = get_session()
+    try:
+        acct = session.query(MonitoredAccount).get(account_id)
+        if acct:
+            acct.is_active = not acct.is_active
+            session.commit()
+            state = "activated" if acct.is_active else "paused"
+            flash(f"{state.capitalize()} monitoring for {acct.handle}", "success")
+        return redirect(url_for("accounts_list"))
+    finally:
+        session.close()
+
+
+@app.route("/accounts/<int:account_id>/delete", methods=["POST"])
+def accounts_delete(account_id):
+    """Delete an account and all its collected data."""
+    session = get_session()
+    try:
+        acct = session.query(MonitoredAccount).get(account_id)
+        if acct:
+            handle = acct.handle
+            # Delete associated statuses (cascades to mentions, media, tags)
+            session.query(Status).filter_by(account_db_id=acct.id).delete()
+            session.query(CollectionLog).filter_by(account_db_id=acct.id).delete()
+            session.delete(acct)
+            session.commit()
+            flash(f"Deleted {handle} and all collected data", "success")
+        return redirect(url_for("accounts_list"))
+    finally:
+        session.close()
+
+
+@app.route("/statuses")
+def statuses_list():
+    """Browse collected statuses with filters."""
+    session = get_session()
+    try:
+        page = request.args.get("page", 1, type=int)
+        per_page = request.args.get("per_page", 50, type=int)
+        account_id = request.args.get("account_id", type=int)
+        status_type = request.args.get("type", "")
+        search = request.args.get("q", "").strip()
+
+        query = session.query(Status).join(MonitoredAccount)
+
+        if account_id:
+            query = query.filter(Status.account_db_id == account_id)
+        if status_type:
+            query = query.filter(Status.status_type == status_type)
+        if search:
+            query = query.filter(Status.text_content.ilike(f"%{search}%"))
+
+        total = query.count()
+        statuses = (
+            query.order_by(desc(Status.created_at))
+            .offset((page - 1) * per_page)
+            .limit(per_page)
+            .all()
+        )
+
+        accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.username).all()
+        total_pages = max(1, (total + per_page - 1) // per_page)
+
+        return render_template(
+            "statuses.html",
+            statuses=statuses,
+            accounts=accounts,
+            page=page,
+            per_page=per_page,
+            total=total,
+            total_pages=total_pages,
+            current_account_id=account_id,
+            current_type=status_type,
+            search=search,
+        )
+    finally:
+        session.close()
+
+
+@app.route("/statuses/<int:status_db_id>")
+def status_detail(status_db_id):
+    """View a single status with all details."""
+    session = get_session()
+    try:
+        status = session.query(Status).get(status_db_id)
+        if not status:
+            flash("Status not found", "error")
+            return redirect(url_for("statuses_list"))
+        return render_template("status_detail.html", status=status)
+    finally:
+        session.close()
+
+
+@app.route("/api/stats")
+def api_stats():
+    """JSON API endpoint for stats (useful for your analysis pipeline)."""
+    session = get_session()
+    try:
+        stats = {
+            "total_statuses": session.query(func.count(Status.id)).scalar() or 0,
+            "by_type": {},
+            "accounts": [],
+        }
+        for stype in ["post", "reply", "mention", "reblog"]:
+            stats["by_type"][stype] = (
+                session.query(func.count(Status.id)).filter(Status.status_type == stype).scalar() or 0
+            )
+
+        accounts = session.query(MonitoredAccount).filter_by(is_active=True).all()
+        for acct in accounts:
+            count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0
+            stats["accounts"].append({
+                "handle": acct.handle,
+                "status_count": count,
+                "last_collected": acct.last_collected_at.isoformat() if acct.last_collected_at else None,
+            })
+
+        return jsonify(stats)
+    finally:
+        session.close()
+
+
+@app.route("/api/statuses")
+def api_statuses():
+    """JSON API endpoint for statuses (for your analysis pipeline)."""
+    session = get_session()
+    try:
+        page = request.args.get("page", 1, type=int)
+        per_page = min(request.args.get("per_page", 100, type=int), 500)
+        account_id = request.args.get("account_id", type=int)
+        status_type = request.args.get("type", "")
+        since = request.args.get("since", "")  # ISO datetime
+
+        query = session.query(Status)
+
+        if account_id:
+            query = query.filter(Status.account_db_id == account_id)
+        if status_type:
+            query = query.filter(Status.status_type == status_type)
+        if since:
+            query = query.filter(Status.created_at >= since)
+
+        total = query.count()
+        statuses = (
+            query.order_by(desc(Status.created_at))
+            .offset((page - 1) * per_page)
+            .limit(per_page)
+            .all()
+        )
+
+        return jsonify({
+            "total": total,
+            "page": page,
+            "per_page": per_page,
+            "statuses": [
+                {
+                    "id": s.id,
+                    "status_id": s.status_id,
+                    "account": s.account.handle,
+                    "url": s.url,
+                    "content": s.content,
+                    "text_content": s.text_content,
+                    "visibility": s.visibility,
+                    "created_at": s.created_at.isoformat() if s.created_at else None,
+                    "language": s.language,
+                    "status_type": s.status_type,
+                    "in_reply_to_id": s.in_reply_to_id,
+                    "replies_count": s.replies_count,
+                    "reblogs_count": s.reblogs_count,
+                    "favourites_count": s.favourites_count,
+                    "mentions": [
+                        {"acct": m.mentioned_acct, "url": m.mentioned_url}
+                        for m in s.mentions
+                    ],
+                    "tags": [t.name for t in s.tags],
+                }
+                for s in statuses
+            ],
+        })
+    finally:
+        session.close()
+
+
+@app.route("/export")
+def export_csv():
+    """Export statuses as CSV for analysis."""
+    from io import StringIO
+    import csv
+
+    session = get_session()
+    try:
+        account_id = request.args.get("account_id", type=int)
+        status_type = request.args.get("type", "")
+
+        query = session.query(Status).join(MonitoredAccount)
+        if account_id:
+            query = query.filter(Status.account_db_id == account_id)
+        if status_type:
+            query = query.filter(Status.status_type == status_type)
+
+        statuses = query.order_by(desc(Status.created_at)).all()
+
+        output = StringIO()
+        writer = csv.writer(output)
+        writer.writerow([
+            "id", "account", "status_type", "created_at", "url",
+            "text_content", "language", "visibility", "in_reply_to_id",
+            "replies_count", "reblogs_count", "favourites_count",
+            "mentions", "tags", "sensitive", "spoiler_text",
+        ])
+
+        for s in statuses:
+            mentions_str = "; ".join(m.mentioned_acct for m in s.mentions)
+            tags_str = "; ".join(t.name for t in s.tags)
+            writer.writerow([
+                s.status_id, s.account.handle, s.status_type,
+                s.created_at.isoformat() if s.created_at else "",
+                s.url, s.text_content, s.language, s.visibility,
+                s.in_reply_to_id, s.replies_count, s.reblogs_count,
+                s.favourites_count, mentions_str, tags_str,
+                s.sensitive, s.spoiler_text,
+            ])
+
+        from flask import Response
+        return Response(
+            output.getvalue(),
+            mimetype="text/csv",
+            headers={"Content-Disposition": "attachment; filename=mastodon_statuses.csv"},
+        )
+    finally:
+        session.close()
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=5000, debug=True)
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..7c2d970
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,50 @@
+version: "3.8"
+
+services:
+  db:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: mastodon_collector
+      POSTGRES_USER: collector
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-collector_secret}
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+    ports:
+      - "127.0.0.1:5434:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U collector -d mastodon_collector"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  web:
+    build: .
+    restart: unless-stopped
+    command: gunicorn --bind 0.0.0.0:5000 --workers 2 --timeout 120 app.web:app
+    ports:
+      - "127.0.0.1:8585:5000"
+    environment:
+      DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
+      FLASK_SECRET_KEY: ${FLASK_SECRET_KEY:-change-me-in-production}
+    volumes:
+      - ./accounts.txt:/app/accounts.txt
+    depends_on:
+      db:
+        condition: service_healthy
+
+  collector:
+    build: .
+    restart: unless-stopped
+    command: python -m app.collector
+    environment:
+      DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
+      POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-14400}
+    volumes:
+      - ./accounts.txt:/app/accounts.txt
+    depends_on:
+      db:
+        condition: service_healthy
+
+volumes:
+  pgdata:
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b554053
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+flask==3.1.0
+gunicorn==23.0.0
+psycopg2-binary==2.9.10
+sqlalchemy==2.0.36
+requests==2.32.3
+apscheduler==3.10.4