Initial commit: Mastodon collector application
Add Flask-based application for collecting and archiving Mastodon posts from configured accounts. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
commit
1783a48d7c
18 changed files with 2115 additions and 0 deletions
6
.dockerignore
Normal file
6
.dockerignore
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
.env
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
README.md
|
||||||
8
.env.example
Normal file
8
.env.example
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
# PostgreSQL Configuration
|
||||||
|
POSTGRES_PASSWORD=your_secure_password_here
|
||||||
|
|
||||||
|
# Flask Configuration
|
||||||
|
FLASK_SECRET_KEY=your_secure_secret_key_here
|
||||||
|
|
||||||
|
# Polling Configuration
|
||||||
|
POLL_INTERVAL_SECONDS=14400
|
||||||
115
.gitignore
vendored
Normal file
115
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
# Environment variables and secrets
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
*.secret
|
||||||
|
secrets/
|
||||||
|
credentials/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
pip-wheel-metadata/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
.venv/
|
||||||
|
|
||||||
|
# IDEs
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Database files
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
*.db
|
||||||
|
*.db-journal
|
||||||
|
*.db-shm
|
||||||
|
*.db-wal
|
||||||
|
postgres_data/
|
||||||
|
pgdata/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
*.log.*
|
||||||
|
|
||||||
|
# Docker volumes and local data
|
||||||
|
docker-compose.override.yml
|
||||||
|
.docker/
|
||||||
|
volumes/
|
||||||
|
|
||||||
|
# Certificates and keys
|
||||||
|
*.pem
|
||||||
|
*.key
|
||||||
|
*.crt
|
||||||
|
*.cer
|
||||||
|
*.p12
|
||||||
|
*.pfx
|
||||||
|
|
||||||
|
# Backup files
|
||||||
|
*.bak
|
||||||
|
*.backup
|
||||||
|
*.tmp
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS generated files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.hypothesis/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
Pipfile.lock
|
||||||
|
|
||||||
|
# Poetry
|
||||||
|
poetry.lock
|
||||||
20
Dockerfile
Normal file
20
Dockerfile
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libpq-dev gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create empty accounts file if it doesn't exist
|
||||||
|
RUN touch /app/accounts.txt
|
||||||
|
|
||||||
|
EXPOSE 5000
|
||||||
|
|
||||||
|
CMD ["python", "-m", "app.collector"]
|
||||||
91
README.md
Normal file
91
README.md
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
# Mastodon Collector
|
||||||
|
|
||||||
|
Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes a web UI for account management and data browsing, plus JSON/CSV APIs for your analysis pipeline.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Add accounts to monitor
|
||||||
|
echo "@user@mastodon.social" >> accounts.txt
|
||||||
|
|
||||||
|
# 2. Start everything
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 3. Open the dashboard
|
||||||
|
open http://localhost:8585
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
| Service | Description | Port |
|
||||||
|
|---------------|------------------------------------------------|-------|
|
||||||
|
| **db** | PostgreSQL 16 | 5432 |
|
||||||
|
| **web** | Flask dashboard (Gunicorn) | 8585 |
|
||||||
|
| **collector** | Background service, polls every 4 hours | — |
|
||||||
|
|
||||||
|
## Adding Accounts
|
||||||
|
|
||||||
|
Two methods:
|
||||||
|
|
||||||
|
1. **Text file** — edit `accounts.txt`, one handle per line (`@user@instance.social`). Picked up on next collection cycle.
|
||||||
|
2. **Web UI** — go to http://localhost:8585/accounts and use the form.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Edit `.env` to customize:
|
||||||
|
|
||||||
|
```
|
||||||
|
POSTGRES_PASSWORD=collector_secret # Change for production
|
||||||
|
FLASK_SECRET_KEY=change-me-in-production
|
||||||
|
POLL_INTERVAL_SECONDS=14400 # Default: 4 hours (14400s)
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
For plugging into your analysis pipeline:
|
||||||
|
|
||||||
|
| Endpoint | Description |
|
||||||
|
|-----------------------|--------------------------------------|
|
||||||
|
| `GET /api/stats` | Overview stats (counts by type) |
|
||||||
|
| `GET /api/statuses` | Paginated statuses as JSON |
|
||||||
|
| `GET /export` | Download all statuses as CSV |
|
||||||
|
|
||||||
|
### `/api/statuses` parameters
|
||||||
|
|
||||||
|
- `page` — page number (default: 1)
|
||||||
|
- `per_page` — results per page (default: 100, max: 500)
|
||||||
|
- `account_id` — filter by internal account ID
|
||||||
|
- `type` — filter by status type: `post`, `reply`, `mention`, `reblog`
|
||||||
|
- `since` — ISO datetime, only return statuses after this time
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
Main tables:
|
||||||
|
|
||||||
|
- `monitored_accounts` — accounts being tracked
|
||||||
|
- `statuses` — collected posts with plain text + HTML content
|
||||||
|
- `mentions` — who was @-mentioned in each status
|
||||||
|
- `media_attachments` — images/videos attached to statuses
|
||||||
|
- `tags` — hashtags used
|
||||||
|
- `collection_logs` — audit trail of each collection run
|
||||||
|
|
||||||
|
Each status stores `raw_json` with the full Mastodon API response for future analysis needs.
|
||||||
|
|
||||||
|
## Moving to a Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy the project
|
||||||
|
scp -r mastodon-collector/ user@server:~/
|
||||||
|
|
||||||
|
# On the server
|
||||||
|
cd mastodon-collector
|
||||||
|
# Edit .env with production secrets
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Stopping
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose down # Stop services, keep data
|
||||||
|
docker compose down -v # Stop services AND delete database
|
||||||
|
```
|
||||||
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
4
app/__main__.py
Normal file
4
app/__main__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
"""Allow running the collector with `python -m app`."""
|
||||||
|
from app.collector import main
|
||||||
|
|
||||||
|
main()
|
||||||
306
app/collector.py
Normal file
306
app/collector.py
Normal file
|
|
@ -0,0 +1,306 @@
|
||||||
|
"""
|
||||||
|
Collector service — periodically polls Mastodon for new statuses from monitored accounts.
|
||||||
|
Runs as a standalone process via `python -m app.collector`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||||
|
|
||||||
|
from app.db import (
|
||||||
|
init_db,
|
||||||
|
get_session,
|
||||||
|
MonitoredAccount,
|
||||||
|
Status,
|
||||||
|
Mention,
|
||||||
|
MediaAttachment,
|
||||||
|
Tag,
|
||||||
|
CollectionLog,
|
||||||
|
)
|
||||||
|
from app.mastodon_api import (
|
||||||
|
lookup_account,
|
||||||
|
get_account_statuses,
|
||||||
|
parse_status,
|
||||||
|
MastodonAPIError,
|
||||||
|
RateLimitError,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
|
handlers=[logging.StreamHandler(sys.stdout)],
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("collector")
|
||||||
|
|
||||||
|
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL_SECONDS", 14400))
|
||||||
|
ACCOUNTS_FILE = os.environ.get("ACCOUNTS_FILE", "/app/accounts.txt")
|
||||||
|
|
||||||
|
|
||||||
|
def load_accounts_from_file(filepath: str) -> list[tuple[str, str]]:
|
||||||
|
"""Parse accounts.txt and return list of (username, instance) tuples."""
|
||||||
|
accounts = []
|
||||||
|
path = Path(filepath)
|
||||||
|
if not path.exists():
|
||||||
|
logger.warning("Accounts file not found: %s", filepath)
|
||||||
|
return accounts
|
||||||
|
|
||||||
|
for line in path.read_text().splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
# Expected format: @user@instance.social or user@instance.social
|
||||||
|
line = line.lstrip("@")
|
||||||
|
if "@" not in line:
|
||||||
|
logger.warning("Skipping malformed account line: %s", line)
|
||||||
|
continue
|
||||||
|
parts = line.split("@", 1)
|
||||||
|
if len(parts) == 2 and parts[0] and parts[1]:
|
||||||
|
accounts.append((parts[0], parts[1]))
|
||||||
|
else:
|
||||||
|
logger.warning("Skipping malformed account line: %s", line)
|
||||||
|
return accounts
|
||||||
|
|
||||||
|
|
||||||
|
def sync_monitored_accounts(session) -> list[MonitoredAccount]:
|
||||||
|
"""
|
||||||
|
Sync accounts from the file + database.
|
||||||
|
Accounts added via web UI are already in the DB.
|
||||||
|
Accounts in the file get added if missing.
|
||||||
|
Returns all active monitored accounts.
|
||||||
|
"""
|
||||||
|
file_accounts = load_accounts_from_file(ACCOUNTS_FILE)
|
||||||
|
|
||||||
|
for username, instance in file_accounts:
|
||||||
|
existing = (
|
||||||
|
session.query(MonitoredAccount)
|
||||||
|
.filter_by(username=username, instance=instance)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if not existing:
|
||||||
|
logger.info("Adding account from file: @%s@%s", username, instance)
|
||||||
|
acct = MonitoredAccount(username=username, instance=instance, is_active=True)
|
||||||
|
session.add(acct)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
return session.query(MonitoredAccount).filter_by(is_active=True).all()
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_account(session, account: MonitoredAccount) -> bool:
|
||||||
|
"""Look up the Mastodon account ID if we don't have it yet."""
|
||||||
|
if account.account_id:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = lookup_account(account.instance, account.username)
|
||||||
|
account.account_id = data["id"]
|
||||||
|
account.display_name = data.get("display_name", "")
|
||||||
|
account.avatar_url = data.get("avatar", "")
|
||||||
|
account.note = data.get("note", "")
|
||||||
|
session.commit()
|
||||||
|
logger.info("Resolved %s → account_id=%s", account.handle, account.account_id)
|
||||||
|
return True
|
||||||
|
except MastodonAPIError as e:
|
||||||
|
logger.error("Failed to resolve %s: %s", account.handle, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def store_status(session, account: MonitoredAccount, parsed: dict) -> bool:
|
||||||
|
"""Store a parsed status in the database. Returns True if new, False if duplicate."""
|
||||||
|
# Check for duplicate
|
||||||
|
existing = (
|
||||||
|
session.query(Status)
|
||||||
|
.filter_by(status_id=parsed["status_id"], account_db_id=account.id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if existing:
|
||||||
|
# Update interaction counts in case they changed
|
||||||
|
existing.replies_count = parsed["replies_count"]
|
||||||
|
existing.reblogs_count = parsed["reblogs_count"]
|
||||||
|
existing.favourites_count = parsed["favourites_count"]
|
||||||
|
return False
|
||||||
|
|
||||||
|
status = Status(
|
||||||
|
status_id=parsed["status_id"],
|
||||||
|
account_db_id=account.id,
|
||||||
|
uri=parsed["uri"],
|
||||||
|
url=parsed["url"],
|
||||||
|
content=parsed["content"],
|
||||||
|
text_content=parsed["text_content"],
|
||||||
|
visibility=parsed["visibility"],
|
||||||
|
created_at=parsed["created_at"],
|
||||||
|
language=parsed["language"],
|
||||||
|
sensitive=parsed["sensitive"],
|
||||||
|
spoiler_text=parsed["spoiler_text"],
|
||||||
|
in_reply_to_id=parsed["in_reply_to_id"],
|
||||||
|
in_reply_to_account_id=parsed["in_reply_to_account_id"],
|
||||||
|
conversation_id=parsed["conversation_id"],
|
||||||
|
replies_count=parsed["replies_count"],
|
||||||
|
reblogs_count=parsed["reblogs_count"],
|
||||||
|
favourites_count=parsed["favourites_count"],
|
||||||
|
status_type=parsed["status_type"],
|
||||||
|
raw_json=parsed["raw_json"],
|
||||||
|
)
|
||||||
|
session.add(status)
|
||||||
|
session.flush() # get status.id
|
||||||
|
|
||||||
|
# Store mentions
|
||||||
|
for m in parsed["mentions"]:
|
||||||
|
session.add(Mention(
|
||||||
|
status_db_id=status.id,
|
||||||
|
mentioned_account_id=m["mentioned_account_id"],
|
||||||
|
mentioned_username=m["mentioned_username"],
|
||||||
|
mentioned_acct=m["mentioned_acct"],
|
||||||
|
mentioned_url=m["mentioned_url"],
|
||||||
|
))
|
||||||
|
|
||||||
|
# Store media
|
||||||
|
for ma in parsed["media_attachments"]:
|
||||||
|
session.add(MediaAttachment(
|
||||||
|
status_db_id=status.id,
|
||||||
|
media_id=ma["media_id"],
|
||||||
|
media_type=ma["media_type"],
|
||||||
|
url=ma["url"],
|
||||||
|
preview_url=ma["preview_url"],
|
||||||
|
description=ma["description"],
|
||||||
|
))
|
||||||
|
|
||||||
|
# Store tags
|
||||||
|
for t in parsed["tags"]:
|
||||||
|
session.add(Tag(
|
||||||
|
status_db_id=status.id,
|
||||||
|
name=t["name"],
|
||||||
|
url=t["url"],
|
||||||
|
))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def collect_account(session, account: MonitoredAccount) -> int:
|
||||||
|
"""Collect new statuses for a single account. Returns count of new statuses."""
|
||||||
|
log = CollectionLog(account_db_id=account.id, status="running")
|
||||||
|
session.add(log)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not resolve_account(session, account):
|
||||||
|
log.status = "error"
|
||||||
|
log.error = "Could not resolve account ID"
|
||||||
|
log.finished_at = datetime.now(timezone.utc)
|
||||||
|
session.commit()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
logger.info("Collecting statuses for %s (since_id=%s)", account.handle, account.last_status_id)
|
||||||
|
|
||||||
|
raw_statuses = get_account_statuses(
|
||||||
|
instance=account.instance,
|
||||||
|
account_id=account.account_id,
|
||||||
|
since_id=account.last_status_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
new_count = 0
|
||||||
|
newest_id = account.last_status_id
|
||||||
|
|
||||||
|
for raw in raw_statuses:
|
||||||
|
parsed = parse_status(raw, account.account_id)
|
||||||
|
is_new = store_status(session, account, parsed)
|
||||||
|
if is_new:
|
||||||
|
new_count += 1
|
||||||
|
|
||||||
|
# Track the newest status ID
|
||||||
|
sid = parsed["status_id"]
|
||||||
|
if newest_id is None or sid > newest_id:
|
||||||
|
newest_id = sid
|
||||||
|
|
||||||
|
if newest_id:
|
||||||
|
account.last_status_id = newest_id
|
||||||
|
account.last_collected_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
log.statuses_collected = new_count
|
||||||
|
log.status = "success"
|
||||||
|
log.finished_at = datetime.now(timezone.utc)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
logger.info("Collected %d new statuses for %s (total fetched: %d)",
|
||||||
|
new_count, account.handle, len(raw_statuses))
|
||||||
|
return new_count
|
||||||
|
|
||||||
|
except RateLimitError as e:
|
||||||
|
log.status = "error"
|
||||||
|
log.error = f"Rate limited: {e}"
|
||||||
|
log.finished_at = datetime.now(timezone.utc)
|
||||||
|
session.commit()
|
||||||
|
logger.warning("Rate limited while collecting %s: %s", account.handle, e)
|
||||||
|
time.sleep(e.retry_after)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except MastodonAPIError as e:
|
||||||
|
log.status = "error"
|
||||||
|
log.error = str(e)
|
||||||
|
log.finished_at = datetime.now(timezone.utc)
|
||||||
|
session.commit()
|
||||||
|
logger.error("API error collecting %s: %s", account.handle, e)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.status = "error"
|
||||||
|
log.error = str(e)
|
||||||
|
log.finished_at = datetime.now(timezone.utc)
|
||||||
|
session.commit()
|
||||||
|
logger.exception("Unexpected error collecting %s", account.handle)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def run_collection_cycle():
|
||||||
|
"""Run one full collection cycle across all monitored accounts."""
|
||||||
|
logger.info("=== Starting collection cycle ===")
|
||||||
|
session = get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
accounts = sync_monitored_accounts(session)
|
||||||
|
logger.info("Monitoring %d active accounts", len(accounts))
|
||||||
|
|
||||||
|
total_new = 0
|
||||||
|
for account in accounts:
|
||||||
|
new = collect_account(session, account)
|
||||||
|
total_new += new
|
||||||
|
time.sleep(1) # Brief pause between accounts to be polite
|
||||||
|
|
||||||
|
logger.info("=== Collection cycle complete: %d new statuses across %d accounts ===",
|
||||||
|
total_new, len(accounts))
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Fatal error in collection cycle")
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Entry point: initialize DB and start the scheduler."""
|
||||||
|
logger.info("Mastodon Collector starting up...")
|
||||||
|
logger.info("Poll interval: %d seconds (%d hours)", POLL_INTERVAL, POLL_INTERVAL // 3600)
|
||||||
|
|
||||||
|
init_db()
|
||||||
|
logger.info("Database initialized")
|
||||||
|
|
||||||
|
# Run one collection immediately on startup
|
||||||
|
run_collection_cycle()
|
||||||
|
|
||||||
|
# Schedule recurring collection
|
||||||
|
scheduler = BlockingScheduler()
|
||||||
|
scheduler.add_job(run_collection_cycle, "interval", seconds=POLL_INTERVAL)
|
||||||
|
logger.info("Scheduler started — next run in %d seconds", POLL_INTERVAL)
|
||||||
|
|
||||||
|
try:
|
||||||
|
scheduler.start()
|
||||||
|
except (KeyboardInterrupt, SystemExit):
|
||||||
|
logger.info("Collector shutting down")
|
||||||
|
scheduler.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
186
app/db.py
Normal file
186
app/db.py
Normal file
|
|
@ -0,0 +1,186 @@
|
||||||
|
"""Database models and session management."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
create_engine,
|
||||||
|
Column,
|
||||||
|
Integer,
|
||||||
|
BigInteger,
|
||||||
|
String,
|
||||||
|
Text,
|
||||||
|
Boolean,
|
||||||
|
DateTime,
|
||||||
|
ForeignKey,
|
||||||
|
Index,
|
||||||
|
UniqueConstraint,
|
||||||
|
JSON,
|
||||||
|
)
|
||||||
|
from sqlalchemy.orm import declarative_base, sessionmaker, relationship
|
||||||
|
|
||||||
|
DATABASE_URL = os.environ.get(
|
||||||
|
"DATABASE_URL", "postgresql://collector:collector_secret@localhost:5432/mastodon_collector"
|
||||||
|
)
|
||||||
|
|
||||||
|
engine = create_engine(DATABASE_URL, pool_pre_ping=True, pool_size=5, max_overflow=10)
|
||||||
|
SessionLocal = sessionmaker(bind=engine)
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class MonitoredAccount(Base):
|
||||||
|
"""An account we are monitoring."""
|
||||||
|
|
||||||
|
__tablename__ = "monitored_accounts"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
username = Column(String(255), nullable=False) # e.g. "user"
|
||||||
|
instance = Column(String(255), nullable=False) # e.g. "mastodon.social"
|
||||||
|
account_id = Column(String(64), nullable=True) # Mastodon numeric account ID on that instance
|
||||||
|
display_name = Column(String(512), nullable=True)
|
||||||
|
avatar_url = Column(Text, nullable=True)
|
||||||
|
is_active = Column(Boolean, default=True, nullable=False)
|
||||||
|
last_collected_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
last_status_id = Column(String(64), nullable=True) # For pagination: newest status ID we've seen
|
||||||
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
|
note = Column(Text, nullable=True) # Bio / description
|
||||||
|
|
||||||
|
statuses = relationship("Status", back_populates="account", lazy="dynamic")
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("username", "instance", name="uq_account_handle"),
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def handle(self):
|
||||||
|
return f"@{self.username}@{self.instance}"
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<MonitoredAccount {self.handle}>"
|
||||||
|
|
||||||
|
|
||||||
|
class Status(Base):
|
||||||
|
"""A single post / toot collected from Mastodon."""
|
||||||
|
|
||||||
|
__tablename__ = "statuses"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
status_id = Column(String(64), nullable=False) # Mastodon status ID
|
||||||
|
account_db_id = Column(Integer, ForeignKey("monitored_accounts.id"), nullable=False)
|
||||||
|
uri = Column(Text, nullable=False) # Canonical ActivityPub URI
|
||||||
|
url = Column(Text, nullable=True) # Human-readable URL
|
||||||
|
content = Column(Text, nullable=False) # HTML content
|
||||||
|
text_content = Column(Text, nullable=True) # Stripped plain-text content
|
||||||
|
visibility = Column(String(32), nullable=True) # public, unlisted, private, direct
|
||||||
|
created_at = Column(DateTime(timezone=True), nullable=False)
|
||||||
|
collected_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
|
language = Column(String(16), nullable=True)
|
||||||
|
sensitive = Column(Boolean, default=False)
|
||||||
|
spoiler_text = Column(Text, nullable=True)
|
||||||
|
|
||||||
|
# Reply / conversation tracking
|
||||||
|
in_reply_to_id = Column(String(64), nullable=True) # Status ID being replied to
|
||||||
|
in_reply_to_account_id = Column(String(64), nullable=True)
|
||||||
|
conversation_id = Column(String(64), nullable=True)
|
||||||
|
|
||||||
|
# Interaction counts
|
||||||
|
replies_count = Column(Integer, default=0)
|
||||||
|
reblogs_count = Column(Integer, default=0)
|
||||||
|
favourites_count = Column(Integer, default=0)
|
||||||
|
|
||||||
|
# Classification for your analysis pipeline
|
||||||
|
status_type = Column(String(32), nullable=False, default="post") # post, reply, mention, reblog
|
||||||
|
|
||||||
|
# Store the full JSON for future reference
|
||||||
|
raw_json = Column(JSON, nullable=True)
|
||||||
|
|
||||||
|
# Relationships
|
||||||
|
account = relationship("MonitoredAccount", back_populates="statuses")
|
||||||
|
mentions = relationship("Mention", back_populates="status", cascade="all, delete-orphan")
|
||||||
|
media_attachments = relationship("MediaAttachment", back_populates="status", cascade="all, delete-orphan")
|
||||||
|
tags = relationship("Tag", back_populates="status", cascade="all, delete-orphan")
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
UniqueConstraint("status_id", "account_db_id", name="uq_status_per_account"),
|
||||||
|
Index("ix_status_created", "created_at"),
|
||||||
|
Index("ix_status_type", "status_type"),
|
||||||
|
Index("ix_status_account", "account_db_id"),
|
||||||
|
Index("ix_status_conversation", "conversation_id"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Status {self.status_id} type={self.status_type}>"
|
||||||
|
|
||||||
|
|
||||||
|
class Mention(Base):
|
||||||
|
"""A mention within a status (who was @-mentioned)."""
|
||||||
|
|
||||||
|
__tablename__ = "mentions"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
|
||||||
|
mentioned_account_id = Column(String(64), nullable=True)
|
||||||
|
mentioned_username = Column(String(255), nullable=False)
|
||||||
|
mentioned_acct = Column(String(512), nullable=False) # full user@instance
|
||||||
|
mentioned_url = Column(Text, nullable=True)
|
||||||
|
|
||||||
|
status = relationship("Status", back_populates="mentions")
|
||||||
|
|
||||||
|
|
||||||
|
class MediaAttachment(Base):
|
||||||
|
"""Media attached to a status."""
|
||||||
|
|
||||||
|
__tablename__ = "media_attachments"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
|
||||||
|
media_id = Column(String(64), nullable=True)
|
||||||
|
media_type = Column(String(32), nullable=True) # image, video, gifv, audio
|
||||||
|
url = Column(Text, nullable=True)
|
||||||
|
preview_url = Column(Text, nullable=True)
|
||||||
|
description = Column(Text, nullable=True) # alt text
|
||||||
|
|
||||||
|
status = relationship("Status", back_populates="media_attachments")
|
||||||
|
|
||||||
|
|
||||||
|
class Tag(Base):
|
||||||
|
"""A hashtag used in a status."""
|
||||||
|
|
||||||
|
__tablename__ = "tags"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
status_db_id = Column(Integer, ForeignKey("statuses.id", ondelete="CASCADE"), nullable=False)
|
||||||
|
name = Column(String(255), nullable=False)
|
||||||
|
url = Column(Text, nullable=True)
|
||||||
|
|
||||||
|
status = relationship("Status", back_populates="tags")
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_tag_name", "name"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CollectionLog(Base):
|
||||||
|
"""Log of each collection run for monitoring."""
|
||||||
|
|
||||||
|
__tablename__ = "collection_logs"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
account_db_id = Column(Integer, ForeignKey("monitored_accounts.id"), nullable=True)
|
||||||
|
started_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
|
finished_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
statuses_collected = Column(Integer, default=0)
|
||||||
|
error = Column(Text, nullable=True)
|
||||||
|
status = Column(String(32), default="running") # running, success, error
|
||||||
|
|
||||||
|
account = relationship("MonitoredAccount")
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
"""Create all tables."""
|
||||||
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
|
||||||
|
def get_session():
|
||||||
|
"""Get a new database session."""
|
||||||
|
return SessionLocal()
|
||||||
226
app/mastodon_api.py
Normal file
226
app/mastodon_api.py
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
"""Mastodon public API client — no authentication required."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from html import unescape
|
||||||
|
from typing import Optional
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Respect rate limits: Mastodon returns 300 requests per 5 min by default
|
||||||
|
DEFAULT_TIMEOUT = 30
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
RETRY_BACKOFF = 5 # seconds
|
||||||
|
|
||||||
|
|
||||||
|
class MastodonAPIError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimitError(MastodonAPIError):
|
||||||
|
def __init__(self, retry_after: float = 60):
|
||||||
|
self.retry_after = retry_after
|
||||||
|
super().__init__(f"Rate limited, retry after {retry_after}s")
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(html: str) -> str:
|
||||||
|
"""Strip HTML tags and decode entities to get plain text."""
|
||||||
|
# Replace <br> and </p> with newlines
|
||||||
|
text = re.sub(r"<br\s*/?>", "\n", html)
|
||||||
|
text = re.sub(r"</p>", "\n", text)
|
||||||
|
# Remove all remaining tags
|
||||||
|
text = re.sub(r"<[^>]+>", "", text)
|
||||||
|
return unescape(text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _api_get(instance: str, path: str, params: Optional[dict] = None) -> requests.Response:
|
||||||
|
"""Make a GET request to a Mastodon instance's public API."""
|
||||||
|
url = f"https://{instance}{path}"
|
||||||
|
headers = {"Accept": "application/json", "User-Agent": "MastodonCollector/1.0"}
|
||||||
|
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, params=params, headers=headers, timeout=DEFAULT_TIMEOUT)
|
||||||
|
|
||||||
|
if resp.status_code == 429:
|
||||||
|
retry_after = float(resp.headers.get("X-RateLimit-Reset", 60))
|
||||||
|
# If it's an ISO timestamp, calculate delta
|
||||||
|
if retry_after > 1_000_000:
|
||||||
|
retry_after = 60
|
||||||
|
logger.warning("Rate limited by %s, waiting %.0fs", instance, retry_after)
|
||||||
|
raise RateLimitError(retry_after)
|
||||||
|
|
||||||
|
if resp.status_code == 404:
|
||||||
|
raise MastodonAPIError(f"Not found: {url}")
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp
|
||||||
|
|
||||||
|
except RateLimitError:
|
||||||
|
raise
|
||||||
|
except requests.RequestException as e:
|
||||||
|
if attempt < MAX_RETRIES - 1:
|
||||||
|
wait = RETRY_BACKOFF * (attempt + 1)
|
||||||
|
logger.warning("Request to %s failed (attempt %d/%d): %s — retrying in %ds",
|
||||||
|
url, attempt + 1, MAX_RETRIES, e, wait)
|
||||||
|
time.sleep(wait)
|
||||||
|
else:
|
||||||
|
raise MastodonAPIError(f"Failed after {MAX_RETRIES} attempts: {e}") from e
|
||||||
|
|
||||||
|
raise MastodonAPIError("Unexpected retry exhaustion")
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_account(instance: str, username: str) -> dict:
|
||||||
|
"""Look up an account on an instance by username. Returns the account JSON."""
|
||||||
|
# Try the v1 lookup endpoint first (available on most instances)
|
||||||
|
try:
|
||||||
|
resp = _api_get(instance, "/api/v1/accounts/lookup", {"acct": username})
|
||||||
|
return resp.json()
|
||||||
|
except MastodonAPIError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback: search for the account
|
||||||
|
resp = _api_get(instance, "/api/v2/search", {"q": f"@{username}@{instance}", "type": "accounts", "limit": 1})
|
||||||
|
data = resp.json()
|
||||||
|
accounts = data.get("accounts", [])
|
||||||
|
if not accounts:
|
||||||
|
raise MastodonAPIError(f"Account @{username} not found on {instance}")
|
||||||
|
return accounts[0]
|
||||||
|
|
||||||
|
|
||||||
|
def get_account_statuses(
|
||||||
|
instance: str,
|
||||||
|
account_id: str,
|
||||||
|
since_id: Optional[str] = None,
|
||||||
|
limit: int = 40,
|
||||||
|
exclude_reblogs: bool = False,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Fetch statuses from an account. Handles pagination to get all new statuses.
|
||||||
|
Returns list of status dicts, oldest first.
|
||||||
|
"""
|
||||||
|
all_statuses = []
|
||||||
|
params = {"limit": min(limit, 40)}
|
||||||
|
if since_id:
|
||||||
|
params["since_id"] = since_id
|
||||||
|
if exclude_reblogs:
|
||||||
|
params["exclude_reblogs"] = "true"
|
||||||
|
|
||||||
|
path = f"/api/v1/accounts/{account_id}/statuses"
|
||||||
|
|
||||||
|
# Paginate through results
|
||||||
|
max_pages = 25 # safety limit
|
||||||
|
page = 0
|
||||||
|
|
||||||
|
while page < max_pages:
|
||||||
|
resp = _api_get(instance, path, params)
|
||||||
|
statuses = resp.json()
|
||||||
|
|
||||||
|
if not statuses:
|
||||||
|
break
|
||||||
|
|
||||||
|
all_statuses.extend(statuses)
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
# Check Link header for next page
|
||||||
|
link_header = resp.headers.get("Link", "")
|
||||||
|
next_match = re.search(r'<([^>]+)>;\s*rel="next"', link_header)
|
||||||
|
if not next_match:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Parse the next URL for max_id
|
||||||
|
next_url = next_match.group(1)
|
||||||
|
max_id_match = re.search(r"max_id=(\d+)", next_url)
|
||||||
|
if not max_id_match:
|
||||||
|
break
|
||||||
|
|
||||||
|
params["max_id"] = max_id_match.group(1)
|
||||||
|
# Remove since_id for subsequent pages — we're paginating backwards
|
||||||
|
# Actually we keep since_id as the floor
|
||||||
|
time.sleep(0.5) # Be polite between pages
|
||||||
|
|
||||||
|
# Return oldest first so we can process chronologically
|
||||||
|
all_statuses.reverse()
|
||||||
|
return all_statuses
|
||||||
|
|
||||||
|
|
||||||
|
def get_status_context(instance: str, status_id: str) -> dict:
|
||||||
|
"""Get the context (ancestors + descendants) of a status. Useful for threading."""
|
||||||
|
resp = _api_get(instance, f"/api/v1/statuses/{status_id}/context")
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def classify_status(status: dict, monitored_account_id: str) -> str:
|
||||||
|
"""
|
||||||
|
Classify a status as: post, reply, mention, or reblog.
|
||||||
|
- reblog: the status is a boost of another status
|
||||||
|
- reply: the status is in reply to another status
|
||||||
|
- mention: the status mentions other accounts (but is not a reply)
|
||||||
|
- post: a standalone original post
|
||||||
|
"""
|
||||||
|
if status.get("reblog"):
|
||||||
|
return "reblog"
|
||||||
|
if status.get("in_reply_to_id"):
|
||||||
|
return "reply"
|
||||||
|
mentions = status.get("mentions", [])
|
||||||
|
if mentions:
|
||||||
|
# Only classify as "mention" if it mentions someone other than self
|
||||||
|
other_mentions = [m for m in mentions if m.get("id") != monitored_account_id]
|
||||||
|
if other_mentions:
|
||||||
|
return "mention"
|
||||||
|
return "post"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_status(status: dict, monitored_account_id: str) -> dict:
|
||||||
|
"""Parse a raw Mastodon status JSON into a flat dict for storage."""
|
||||||
|
# If it's a reblog, we store the original content but flag it
|
||||||
|
actual = status.get("reblog") or status
|
||||||
|
content_html = actual.get("content", "")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status_id": status["id"],
|
||||||
|
"uri": status.get("uri", ""),
|
||||||
|
"url": status.get("url") or actual.get("url", ""),
|
||||||
|
"content": content_html,
|
||||||
|
"text_content": _strip_html(content_html),
|
||||||
|
"visibility": status.get("visibility", "public"),
|
||||||
|
"created_at": status.get("created_at"),
|
||||||
|
"language": status.get("language") or actual.get("language"),
|
||||||
|
"sensitive": status.get("sensitive", False),
|
||||||
|
"spoiler_text": status.get("spoiler_text", ""),
|
||||||
|
"in_reply_to_id": status.get("in_reply_to_id"),
|
||||||
|
"in_reply_to_account_id": status.get("in_reply_to_account_id"),
|
||||||
|
"conversation_id": status.get("conversation", {}).get("id") if isinstance(status.get("conversation"), dict) else None,
|
||||||
|
"replies_count": status.get("replies_count", 0),
|
||||||
|
"reblogs_count": status.get("reblogs_count", 0),
|
||||||
|
"favourites_count": status.get("favourites_count", 0),
|
||||||
|
"status_type": classify_status(status, monitored_account_id),
|
||||||
|
"mentions": [
|
||||||
|
{
|
||||||
|
"mentioned_account_id": m.get("id"),
|
||||||
|
"mentioned_username": m.get("username", ""),
|
||||||
|
"mentioned_acct": m.get("acct", ""),
|
||||||
|
"mentioned_url": m.get("url", ""),
|
||||||
|
}
|
||||||
|
for m in (actual.get("mentions") or [])
|
||||||
|
],
|
||||||
|
"media_attachments": [
|
||||||
|
{
|
||||||
|
"media_id": ma.get("id"),
|
||||||
|
"media_type": ma.get("type"),
|
||||||
|
"url": ma.get("url"),
|
||||||
|
"preview_url": ma.get("preview_url"),
|
||||||
|
"description": ma.get("description"),
|
||||||
|
}
|
||||||
|
for ma in (actual.get("media_attachments") or [])
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
{"name": t.get("name", ""), "url": t.get("url", "")}
|
||||||
|
for t in (actual.get("tags") or [])
|
||||||
|
],
|
||||||
|
"raw_json": status,
|
||||||
|
}
|
||||||
77
app/templates/accounts.html
Normal file
77
app/templates/accounts.html
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Accounts — Mastodon Collector{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="flex justify-between items-center mb-4">
|
||||||
|
<h1>Monitored Accounts</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Add Account</h2>
|
||||||
|
<form method="POST" action="{{ url_for('accounts_add') }}" class="form-inline mt-2">
|
||||||
|
<input type="text" name="handle" placeholder="@user@instance.social" style="width: 320px;" required>
|
||||||
|
<button type="submit" class="btn btn-primary">Add Account</button>
|
||||||
|
</form>
|
||||||
|
<p class="text-muted text-sm mt-2">
|
||||||
|
You can also add accounts by editing <code>accounts.txt</code> — the collector picks them up automatically.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Handle</th>
|
||||||
|
<th>Display Name</th>
|
||||||
|
<th>Account ID</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Last Collected</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for acct in accounts %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="{{ url_for('statuses_list', account_id=acct.id) }}" style="color: var(--accent);">
|
||||||
|
{{ acct.handle }}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td>{{ acct.display_name or '—' }}</td>
|
||||||
|
<td class="text-muted text-sm">{{ acct.account_id or 'unresolved' }}</td>
|
||||||
|
<td>
|
||||||
|
{% if acct.is_active %}
|
||||||
|
<span class="badge badge-active">Active</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="badge badge-paused">Paused</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td class="text-muted text-sm">
|
||||||
|
{{ acct.last_collected_at.strftime('%Y-%m-%d %H:%M') if acct.last_collected_at else 'Never' }}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<form method="POST" action="{{ url_for('accounts_toggle', account_id=acct.id) }}">
|
||||||
|
<button type="submit" class="btn btn-outline btn-sm">
|
||||||
|
{{ 'Pause' if acct.is_active else 'Resume' }}
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
<form method="POST" action="{{ url_for('accounts_delete', account_id=acct.id) }}"
|
||||||
|
onsubmit="return confirm('Delete {{ acct.handle }} and ALL collected data? This cannot be undone.')">
|
||||||
|
<button type="submit" class="btn btn-danger btn-sm">Delete</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
{% if not accounts %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="6" class="text-muted" style="text-align:center; padding: 24px;">
|
||||||
|
No accounts yet. Add one above or edit <code>accounts.txt</code>.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
262
app/templates/base.html
Normal file
262
app/templates/base.html
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>{% block title %}Mastodon Collector{% endblock %}</title>
|
||||||
|
<style>
|
||||||
|
:root {
|
||||||
|
--bg: #1a1a2e;
|
||||||
|
--bg-card: #16213e;
|
||||||
|
--bg-hover: #1a2745;
|
||||||
|
--text: #e0e0e0;
|
||||||
|
--text-muted: #8892a4;
|
||||||
|
--accent: #e2dbff;
|
||||||
|
--accent-hover: #ffffff;
|
||||||
|
--link: #7dd3fc;
|
||||||
|
--success: #2ecc71;
|
||||||
|
--warning: #f39c12;
|
||||||
|
--danger: #e74c3c;
|
||||||
|
--border: #2a3a5c;
|
||||||
|
--tag-post: #3498db;
|
||||||
|
--tag-reply: #e67e22;
|
||||||
|
--tag-mention: #9b59b6;
|
||||||
|
--tag-reblog: #1abc9c;
|
||||||
|
}
|
||||||
|
|
||||||
|
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--text);
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container { max-width: 1200px; margin: 0 auto; padding: 0 20px; }
|
||||||
|
|
||||||
|
nav {
|
||||||
|
background: var(--bg-card);
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
padding: 12px 0;
|
||||||
|
position: sticky;
|
||||||
|
top: 0;
|
||||||
|
z-index: 100;
|
||||||
|
}
|
||||||
|
nav .container {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 24px;
|
||||||
|
}
|
||||||
|
nav .logo {
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 700;
|
||||||
|
color: var(--accent);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
nav a {
|
||||||
|
color: var(--text-muted);
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 14px;
|
||||||
|
padding: 6px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
nav a:hover, nav a.active {
|
||||||
|
color: var(--text);
|
||||||
|
background: var(--bg-hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
main { padding: 24px 0; }
|
||||||
|
|
||||||
|
h1 { font-size: 24px; margin-bottom: 20px; }
|
||||||
|
h2 { font-size: 18px; margin-bottom: 12px; color: var(--text-muted); }
|
||||||
|
|
||||||
|
.flash {
|
||||||
|
padding: 12px 16px;
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 16px;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
.flash.success { background: rgba(46, 204, 113, 0.15); border: 1px solid var(--success); color: var(--success); }
|
||||||
|
.flash.error { background: rgba(231, 76, 60, 0.15); border: 1px solid var(--danger); color: var(--danger); }
|
||||||
|
.flash.info { background: rgba(108, 99, 255, 0.15); border: 1px solid var(--accent); color: var(--accent); }
|
||||||
|
|
||||||
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
||||||
|
gap: 16px;
|
||||||
|
margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
.stat-card {
|
||||||
|
background: var(--bg-card);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 10px;
|
||||||
|
padding: 20px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
.stat-card .number {
|
||||||
|
font-size: 32px;
|
||||||
|
font-weight: 700;
|
||||||
|
color: var(--accent);
|
||||||
|
}
|
||||||
|
.stat-card .label {
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--text-muted);
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card {
|
||||||
|
background: var(--bg-card);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 10px;
|
||||||
|
padding: 20px;
|
||||||
|
margin-bottom: 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
th, td {
|
||||||
|
padding: 10px 12px;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
th {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 12px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
}
|
||||||
|
tr:hover { background: var(--bg-hover); }
|
||||||
|
td a { color: var(--link); text-decoration: none; }
|
||||||
|
td a:hover { color: #ffffff; text-decoration: underline; }
|
||||||
|
|
||||||
|
.badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 2px 8px;
|
||||||
|
border-radius: 12px;
|
||||||
|
font-size: 11px;
|
||||||
|
font-weight: 600;
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
.badge-post { background: rgba(52, 152, 219, 0.2); color: var(--tag-post); }
|
||||||
|
.badge-reply { background: rgba(230, 126, 34, 0.2); color: var(--tag-reply); }
|
||||||
|
.badge-mention { background: rgba(155, 89, 182, 0.2); color: var(--tag-mention); }
|
||||||
|
.badge-reblog { background: rgba(26, 188, 156, 0.2); color: var(--tag-reblog); }
|
||||||
|
.badge-active { background: rgba(46, 204, 113, 0.2); color: var(--success); }
|
||||||
|
.badge-paused { background: rgba(243, 156, 18, 0.2); color: var(--warning); }
|
||||||
|
.badge-success { background: rgba(46, 204, 113, 0.2); color: var(--success); }
|
||||||
|
.badge-error { background: rgba(231, 76, 60, 0.2); color: var(--danger); }
|
||||||
|
.badge-running { background: rgba(108, 99, 255, 0.2); color: var(--accent); }
|
||||||
|
|
||||||
|
.btn {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 8px 16px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 500;
|
||||||
|
text-decoration: none;
|
||||||
|
border: none;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s;
|
||||||
|
}
|
||||||
|
.btn-primary { background: var(--accent); color: white; }
|
||||||
|
.btn-primary:hover { background: var(--accent-hover); }
|
||||||
|
.btn-sm { padding: 4px 10px; font-size: 12px; }
|
||||||
|
.btn-danger { background: var(--danger); color: white; }
|
||||||
|
.btn-danger:hover { background: #c0392b; }
|
||||||
|
.btn-outline {
|
||||||
|
background: transparent;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
.btn-outline:hover { border-color: var(--accent); color: var(--accent); }
|
||||||
|
|
||||||
|
input[type="text"], input[type="search"], select {
|
||||||
|
background: var(--bg);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 8px 12px;
|
||||||
|
color: var(--text);
|
||||||
|
font-size: 14px;
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
input:focus, select:focus { border-color: var(--accent); }
|
||||||
|
|
||||||
|
.form-inline {
|
||||||
|
display: flex;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-content {
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.7;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
.status-content a { color: var(--accent); }
|
||||||
|
|
||||||
|
.pagination {
|
||||||
|
display: flex;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
margin-top: 20px;
|
||||||
|
}
|
||||||
|
.pagination a {
|
||||||
|
color: var(--text-muted);
|
||||||
|
text-decoration: none;
|
||||||
|
padding: 6px 12px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
.pagination a:hover { border-color: var(--accent); color: var(--accent); }
|
||||||
|
.pagination .current {
|
||||||
|
background: var(--accent);
|
||||||
|
color: white;
|
||||||
|
padding: 6px 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text-muted { color: var(--text-muted); }
|
||||||
|
.text-sm { font-size: 13px; }
|
||||||
|
.mt-2 { margin-top: 8px; }
|
||||||
|
.mt-4 { margin-top: 16px; }
|
||||||
|
.mb-4 { margin-bottom: 16px; }
|
||||||
|
.flex { display: flex; }
|
||||||
|
.gap-2 { gap: 8px; }
|
||||||
|
.items-center { align-items: center; }
|
||||||
|
.justify-between { justify-content: space-between; }
|
||||||
|
.truncate { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; max-width: 400px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<nav>
|
||||||
|
<div class="container">
|
||||||
|
<a href="{{ url_for('index') }}" class="logo">Mastodon Collector</a>
|
||||||
|
<a href="{{ url_for('index') }}" class="{{ 'active' if request.endpoint == 'index' }}">Dashboard</a>
|
||||||
|
<a href="{{ url_for('accounts_list') }}" class="{{ 'active' if request.endpoint == 'accounts_list' }}">Accounts</a>
|
||||||
|
<a href="{{ url_for('statuses_list') }}" class="{{ 'active' if request.endpoint == 'statuses_list' }}">Statuses</a>
|
||||||
|
<a href="{{ url_for('export_csv') }}">Export CSV</a>
|
||||||
|
<a href="{{ url_for('api_stats') }}">API</a>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
<div class="container">
|
||||||
|
{% with messages = get_flashed_messages(with_categories=true) %}
|
||||||
|
{% for category, message in messages %}
|
||||||
|
<div class="flash {{ category }}">{{ message }}</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endwith %}
|
||||||
|
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
123
app/templates/index.html
Normal file
123
app/templates/index.html
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Dashboard — Mastodon Collector{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h1>Dashboard</h1>
|
||||||
|
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ total_statuses }}</div>
|
||||||
|
<div class="label">Total Statuses</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ total_posts }}</div>
|
||||||
|
<div class="label">Posts</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ total_replies }}</div>
|
||||||
|
<div class="label">Replies</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ total_mentions }}</div>
|
||||||
|
<div class="label">Mentions</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ total_reblogs }}</div>
|
||||||
|
<div class="label">Reblogs</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="number">{{ account_stats|length }}</div>
|
||||||
|
<div class="label">Monitored Accounts</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<h2>Monitored Accounts</h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Account</th>
|
||||||
|
<th>Instance</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Collected</th>
|
||||||
|
<th>Last Run</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for item in account_stats %}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="{{ url_for('statuses_list', account_id=item.account.id) }}">
|
||||||
|
{{ item.account.handle }}
|
||||||
|
</a>
|
||||||
|
{% if item.account.display_name %}
|
||||||
|
<span class="text-muted text-sm">— {{ item.account.display_name }}</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td class="text-muted">{{ item.account.instance }}</td>
|
||||||
|
<td>
|
||||||
|
{% if item.account.is_active %}
|
||||||
|
<span class="badge badge-active">Active</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="badge badge-paused">Paused</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td>{{ item.status_count }}</td>
|
||||||
|
<td class="text-muted text-sm">
|
||||||
|
{% if item.account.last_collected_at %}
|
||||||
|
{{ item.account.last_collected_at.strftime('%Y-%m-%d %H:%M') }}
|
||||||
|
{% if item.last_log %}
|
||||||
|
<span class="badge badge-{{ item.last_log.status }}">{{ item.last_log.status }}</span>
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
Never
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
{% if not account_stats %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="5" class="text-muted" style="text-align:center; padding: 24px;">
|
||||||
|
No accounts being monitored yet.
|
||||||
|
<a href="{{ url_for('accounts_list') }}" style="color: var(--accent);">Add some accounts</a>.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if recent_logs %}
|
||||||
|
<div class="card mt-4">
|
||||||
|
<h2>Recent Collection Runs</h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Time</th>
|
||||||
|
<th>Account</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Collected</th>
|
||||||
|
<th>Error</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for log in recent_logs %}
|
||||||
|
<tr>
|
||||||
|
<td class="text-sm">{{ log.started_at.strftime('%Y-%m-%d %H:%M:%S') if log.started_at }}</td>
|
||||||
|
<td>
|
||||||
|
{% if log.account %}
|
||||||
|
{{ log.account.handle }}
|
||||||
|
{% else %}
|
||||||
|
—
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td><span class="badge badge-{{ log.status }}">{{ log.status }}</span></td>
|
||||||
|
<td>{{ log.statuses_collected }}</td>
|
||||||
|
<td class="text-muted text-sm truncate">{{ log.error or '—' }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock %}
|
||||||
140
app/templates/status_detail.html
Normal file
140
app/templates/status_detail.html
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Status Detail — Mastodon Collector{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="flex items-center gap-2 mb-4">
|
||||||
|
<a href="{{ url_for('statuses_list') }}" class="btn btn-outline btn-sm">← Back</a>
|
||||||
|
<h1>Status Detail</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-4">
|
||||||
|
<div class="flex justify-between items-center mb-4">
|
||||||
|
<div>
|
||||||
|
<span class="badge badge-{{ status.status_type }}">{{ status.status_type }}</span>
|
||||||
|
<span class="text-muted text-sm" style="margin-left: 8px;">
|
||||||
|
{{ status.created_at.strftime('%Y-%m-%d %H:%M:%S UTC') if status.created_at }}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
{% if status.url %}
|
||||||
|
<a href="{{ status.url }}" target="_blank" class="btn btn-outline btn-sm">View on Mastodon ↗</a>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th style="width: 160px;">Account</th>
|
||||||
|
<td>{{ status.account.handle }}{% if status.account.display_name %} — {{ status.account.display_name }}{% endif %}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Visibility</th>
|
||||||
|
<td>{{ status.visibility }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Language</th>
|
||||||
|
<td>{{ status.language or 'Unknown' }}</td>
|
||||||
|
</tr>
|
||||||
|
{% if status.in_reply_to_id %}
|
||||||
|
<tr>
|
||||||
|
<th>In Reply To</th>
|
||||||
|
<td>Status {{ status.in_reply_to_id }}{% if status.in_reply_to_account_id %} (account {{ status.in_reply_to_account_id }}){% endif %}</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
{% if status.conversation_id %}
|
||||||
|
<tr>
|
||||||
|
<th>Conversation</th>
|
||||||
|
<td>{{ status.conversation_id }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
<tr>
|
||||||
|
<th>Interactions</th>
|
||||||
|
<td>↩ {{ status.replies_count }} replies ⟳ {{ status.reblogs_count }} reblogs ★ {{ status.favourites_count }} favourites</td>
|
||||||
|
</tr>
|
||||||
|
{% if status.sensitive %}
|
||||||
|
<tr>
|
||||||
|
<th>Sensitive</th>
|
||||||
|
<td>Yes{% if status.spoiler_text %} — {{ status.spoiler_text }}{% endif %}</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
<tr>
|
||||||
|
<th>Mastodon Status ID</th>
|
||||||
|
<td class="text-muted">{{ status.status_id }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>URI</th>
|
||||||
|
<td class="text-muted text-sm">{{ status.uri }}</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Content (HTML)</h2>
|
||||||
|
<div class="status-content mt-2" style="padding: 16px; background: var(--bg); border-radius: 8px;">
|
||||||
|
{{ status.content | safe }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Content (Plain Text)</h2>
|
||||||
|
<div class="mt-2" style="padding: 16px; background: var(--bg); border-radius: 8px; white-space: pre-wrap; font-family: monospace; font-size: 13px;">{{ status.text_content }}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if status.mentions %}
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Mentions ({{ status.mentions|length }})</h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr><th>Account</th><th>URL</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for m in status.mentions %}
|
||||||
|
<tr>
|
||||||
|
<td>@{{ m.mentioned_acct }}</td>
|
||||||
|
<td class="text-muted text-sm">{{ m.mentioned_url }}</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if status.media_attachments %}
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Media Attachments ({{ status.media_attachments|length }})</h2>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr><th>Type</th><th>Description</th><th>URL</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for ma in status.media_attachments %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ ma.media_type }}</td>
|
||||||
|
<td>{{ ma.description or '—' }}</td>
|
||||||
|
<td class="text-sm"><a href="{{ ma.url }}" target="_blank" style="color: var(--accent);">View ↗</a></td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if status.tags %}
|
||||||
|
<div class="card mb-4">
|
||||||
|
<h2>Tags</h2>
|
||||||
|
<div class="mt-2">
|
||||||
|
{% for t in status.tags %}
|
||||||
|
<span class="badge" style="background: var(--bg); margin: 2px;">#{{ t.name }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<h2>Raw JSON</h2>
|
||||||
|
<details>
|
||||||
|
<summary class="text-muted" style="cursor: pointer; padding: 8px 0;">Click to expand</summary>
|
||||||
|
<pre style="padding: 16px; background: var(--bg); border-radius: 8px; overflow-x: auto; font-size: 12px; max-height: 600px; overflow-y: auto;">{{ status.raw_json | tojson(indent=2) }}</pre>
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
112
app/templates/statuses.html
Normal file
112
app/templates/statuses.html
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}Statuses — Mastodon Collector{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="flex justify-between items-center mb-4">
|
||||||
|
<h1>Collected Statuses <span class="text-muted text-sm">({{ total }})</span></h1>
|
||||||
|
<a href="{{ url_for('export_csv', account_id=current_account_id or '', type=current_type or '') }}"
|
||||||
|
class="btn btn-outline btn-sm">Export CSV</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card mb-4">
|
||||||
|
<form method="GET" action="{{ url_for('statuses_list') }}" class="form-inline">
|
||||||
|
<select name="account_id">
|
||||||
|
<option value="">All accounts</option>
|
||||||
|
{% for acct in accounts %}
|
||||||
|
<option value="{{ acct.id }}" {{ 'selected' if current_account_id == acct.id }}>
|
||||||
|
{{ acct.handle }}
|
||||||
|
</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<select name="type">
|
||||||
|
<option value="">All types</option>
|
||||||
|
<option value="post" {{ 'selected' if current_type == 'post' }}>Posts</option>
|
||||||
|
<option value="reply" {{ 'selected' if current_type == 'reply' }}>Replies</option>
|
||||||
|
<option value="mention" {{ 'selected' if current_type == 'mention' }}>Mentions</option>
|
||||||
|
<option value="reblog" {{ 'selected' if current_type == 'reblog' }}>Reblogs</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<input type="search" name="q" placeholder="Search text content..." value="{{ search }}" style="width: 260px;">
|
||||||
|
<button type="submit" class="btn btn-primary btn-sm">Filter</button>
|
||||||
|
{% if current_account_id or current_type or search %}
|
||||||
|
<a href="{{ url_for('statuses_list') }}" class="btn btn-outline btn-sm">Clear</a>
|
||||||
|
{% endif %}
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Date</th>
|
||||||
|
<th>Account</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Content</th>
|
||||||
|
<th>Interactions</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for s in statuses %}
|
||||||
|
<tr>
|
||||||
|
<td class="text-sm" style="white-space:nowrap;">
|
||||||
|
{{ s.created_at.strftime('%Y-%m-%d %H:%M') if s.created_at }}
|
||||||
|
</td>
|
||||||
|
<td class="text-sm">{{ s.account.handle }}</td>
|
||||||
|
<td>
|
||||||
|
<span class="badge badge-{{ s.status_type }}">{{ s.status_type }}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<div class="truncate status-content">
|
||||||
|
{{ s.text_content[:200] }}{% if s.text_content and s.text_content|length > 200 %}...{% endif %}
|
||||||
|
</div>
|
||||||
|
{% if s.tags %}
|
||||||
|
<div class="text-sm text-muted mt-2">
|
||||||
|
{% for t in s.tags %}
|
||||||
|
<span>#{{ t.name }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
<td class="text-sm text-muted" style="white-space:nowrap;">
|
||||||
|
↩ {{ s.replies_count }} ⟳ {{ s.reblogs_count }} ★ {{ s.favourites_count }}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a href="{{ url_for('status_detail', status_db_id=s.id) }}" class="btn btn-outline btn-sm">View</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
{% if not statuses %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="6" class="text-muted" style="text-align:center; padding: 24px;">
|
||||||
|
No statuses found. The collector runs every {{ (config.get('POLL_INTERVAL_SECONDS', 14400)|int // 3600) }} hours, or you can wait for the first collection cycle.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if total_pages > 1 %}
|
||||||
|
<div class="pagination">
|
||||||
|
{% if page > 1 %}
|
||||||
|
<a href="{{ url_for('statuses_list', page=page-1, account_id=current_account_id, type=current_type, q=search) }}">← Prev</a>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% for p in range(1, total_pages + 1) %}
|
||||||
|
{% if p == page %}
|
||||||
|
<span class="current">{{ p }}</span>
|
||||||
|
{% elif p <= 3 or p >= total_pages - 2 or (p >= page - 2 and p <= page + 2) %}
|
||||||
|
<a href="{{ url_for('statuses_list', page=p, account_id=current_account_id, type=current_type, q=search) }}">{{ p }}</a>
|
||||||
|
{% elif p == 4 or p == total_pages - 3 %}
|
||||||
|
<span class="text-muted">…</span>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
{% if page < total_pages %}
|
||||||
|
<a href="{{ url_for('statuses_list', page=page+1, account_id=current_account_id, type=current_type, q=search) }}">Next →</a>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endblock %}
|
||||||
383
app/web.py
Normal file
383
app/web.py
Normal file
|
|
@ -0,0 +1,383 @@
|
||||||
|
"""Flask web application for managing monitored accounts and viewing collected data."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from flask import Flask, render_template, request, redirect, url_for, flash, jsonify
|
||||||
|
from sqlalchemy import func, desc
|
||||||
|
|
||||||
|
from app.db import (
|
||||||
|
init_db,
|
||||||
|
get_session,
|
||||||
|
MonitoredAccount,
|
||||||
|
Status,
|
||||||
|
Mention,
|
||||||
|
CollectionLog,
|
||||||
|
)
|
||||||
|
from app.mastodon_api import lookup_account, MastodonAPIError
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-key")
|
||||||
|
|
||||||
|
# Initialize database on startup
|
||||||
|
with app.app_context():
|
||||||
|
init_db()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def index():
|
||||||
|
"""Dashboard overview."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all()
|
||||||
|
total_statuses = session.query(func.count(Status.id)).scalar() or 0
|
||||||
|
total_posts = session.query(func.count(Status.id)).filter(Status.status_type == "post").scalar() or 0
|
||||||
|
total_replies = session.query(func.count(Status.id)).filter(Status.status_type == "reply").scalar() or 0
|
||||||
|
total_mentions = session.query(func.count(Status.id)).filter(Status.status_type == "mention").scalar() or 0
|
||||||
|
total_reblogs = session.query(func.count(Status.id)).filter(Status.status_type == "reblog").scalar() or 0
|
||||||
|
|
||||||
|
# Per-account stats
|
||||||
|
account_stats = []
|
||||||
|
for acct in accounts:
|
||||||
|
count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0
|
||||||
|
last_log = (
|
||||||
|
session.query(CollectionLog)
|
||||||
|
.filter_by(account_db_id=acct.id)
|
||||||
|
.order_by(desc(CollectionLog.started_at))
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
account_stats.append({
|
||||||
|
"account": acct,
|
||||||
|
"status_count": count,
|
||||||
|
"last_log": last_log,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Recent collection logs
|
||||||
|
recent_logs = (
|
||||||
|
session.query(CollectionLog)
|
||||||
|
.order_by(desc(CollectionLog.started_at))
|
||||||
|
.limit(20)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"index.html",
|
||||||
|
account_stats=account_stats,
|
||||||
|
total_statuses=total_statuses,
|
||||||
|
total_posts=total_posts,
|
||||||
|
total_replies=total_replies,
|
||||||
|
total_mentions=total_mentions,
|
||||||
|
total_reblogs=total_reblogs,
|
||||||
|
recent_logs=recent_logs,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/accounts")
|
||||||
|
def accounts_list():
|
||||||
|
"""List all monitored accounts."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all()
|
||||||
|
return render_template("accounts.html", accounts=accounts)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/accounts/add", methods=["POST"])
|
||||||
|
def accounts_add():
|
||||||
|
"""Add a new account to monitor."""
|
||||||
|
handle = request.form.get("handle", "").strip().lstrip("@")
|
||||||
|
if "@" not in handle:
|
||||||
|
flash("Invalid handle format. Use @user@instance.social", "error")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
|
||||||
|
username, instance = handle.split("@", 1)
|
||||||
|
if not username or not instance:
|
||||||
|
flash("Invalid handle format. Use @user@instance.social", "error")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
existing = session.query(MonitoredAccount).filter_by(username=username, instance=instance).first()
|
||||||
|
if existing:
|
||||||
|
if not existing.is_active:
|
||||||
|
existing.is_active = True
|
||||||
|
session.commit()
|
||||||
|
flash(f"Re-activated {existing.handle}", "success")
|
||||||
|
else:
|
||||||
|
flash(f"{existing.handle} is already being monitored", "info")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
|
||||||
|
# Try to resolve the account first
|
||||||
|
try:
|
||||||
|
data = lookup_account(instance, username)
|
||||||
|
acct = MonitoredAccount(
|
||||||
|
username=username,
|
||||||
|
instance=instance,
|
||||||
|
account_id=data["id"],
|
||||||
|
display_name=data.get("display_name", ""),
|
||||||
|
avatar_url=data.get("avatar", ""),
|
||||||
|
note=data.get("note", ""),
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
except MastodonAPIError as e:
|
||||||
|
logger.warning("Could not resolve account @%s@%s: %s — adding anyway", username, instance, e)
|
||||||
|
acct = MonitoredAccount(
|
||||||
|
username=username,
|
||||||
|
instance=instance,
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
session.add(acct)
|
||||||
|
session.commit()
|
||||||
|
flash(f"Added {acct.handle} to monitoring list", "success")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/accounts/<int:account_id>/toggle", methods=["POST"])
|
||||||
|
def accounts_toggle(account_id):
|
||||||
|
"""Toggle an account's active status."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
acct = session.query(MonitoredAccount).get(account_id)
|
||||||
|
if acct:
|
||||||
|
acct.is_active = not acct.is_active
|
||||||
|
session.commit()
|
||||||
|
state = "activated" if acct.is_active else "paused"
|
||||||
|
flash(f"{state.capitalize()} monitoring for {acct.handle}", "success")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/accounts/<int:account_id>/delete", methods=["POST"])
|
||||||
|
def accounts_delete(account_id):
|
||||||
|
"""Delete an account and all its collected data."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
acct = session.query(MonitoredAccount).get(account_id)
|
||||||
|
if acct:
|
||||||
|
handle = acct.handle
|
||||||
|
# Delete associated statuses (cascades to mentions, media, tags)
|
||||||
|
session.query(Status).filter_by(account_db_id=acct.id).delete()
|
||||||
|
session.query(CollectionLog).filter_by(account_db_id=acct.id).delete()
|
||||||
|
session.delete(acct)
|
||||||
|
session.commit()
|
||||||
|
flash(f"Deleted {handle} and all collected data", "success")
|
||||||
|
return redirect(url_for("accounts_list"))
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/statuses")
|
||||||
|
def statuses_list():
|
||||||
|
"""Browse collected statuses with filters."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
page = request.args.get("page", 1, type=int)
|
||||||
|
per_page = request.args.get("per_page", 50, type=int)
|
||||||
|
account_id = request.args.get("account_id", type=int)
|
||||||
|
status_type = request.args.get("type", "")
|
||||||
|
search = request.args.get("q", "").strip()
|
||||||
|
|
||||||
|
query = session.query(Status).join(MonitoredAccount)
|
||||||
|
|
||||||
|
if account_id:
|
||||||
|
query = query.filter(Status.account_db_id == account_id)
|
||||||
|
if status_type:
|
||||||
|
query = query.filter(Status.status_type == status_type)
|
||||||
|
if search:
|
||||||
|
query = query.filter(Status.text_content.ilike(f"%{search}%"))
|
||||||
|
|
||||||
|
total = query.count()
|
||||||
|
statuses = (
|
||||||
|
query.order_by(desc(Status.created_at))
|
||||||
|
.offset((page - 1) * per_page)
|
||||||
|
.limit(per_page)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.username).all()
|
||||||
|
total_pages = max(1, (total + per_page - 1) // per_page)
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"statuses.html",
|
||||||
|
statuses=statuses,
|
||||||
|
accounts=accounts,
|
||||||
|
page=page,
|
||||||
|
per_page=per_page,
|
||||||
|
total=total,
|
||||||
|
total_pages=total_pages,
|
||||||
|
current_account_id=account_id,
|
||||||
|
current_type=status_type,
|
||||||
|
search=search,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/statuses/<int:status_db_id>")
|
||||||
|
def status_detail(status_db_id):
|
||||||
|
"""View a single status with all details."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
status = session.query(Status).get(status_db_id)
|
||||||
|
if not status:
|
||||||
|
flash("Status not found", "error")
|
||||||
|
return redirect(url_for("statuses_list"))
|
||||||
|
return render_template("status_detail.html", status=status)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/stats")
|
||||||
|
def api_stats():
|
||||||
|
"""JSON API endpoint for stats (useful for your analysis pipeline)."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
stats = {
|
||||||
|
"total_statuses": session.query(func.count(Status.id)).scalar() or 0,
|
||||||
|
"by_type": {},
|
||||||
|
"accounts": [],
|
||||||
|
}
|
||||||
|
for stype in ["post", "reply", "mention", "reblog"]:
|
||||||
|
stats["by_type"][stype] = (
|
||||||
|
session.query(func.count(Status.id)).filter(Status.status_type == stype).scalar() or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
accounts = session.query(MonitoredAccount).filter_by(is_active=True).all()
|
||||||
|
for acct in accounts:
|
||||||
|
count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0
|
||||||
|
stats["accounts"].append({
|
||||||
|
"handle": acct.handle,
|
||||||
|
"status_count": count,
|
||||||
|
"last_collected": acct.last_collected_at.isoformat() if acct.last_collected_at else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return jsonify(stats)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/statuses")
|
||||||
|
def api_statuses():
|
||||||
|
"""JSON API endpoint for statuses (for your analysis pipeline)."""
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
page = request.args.get("page", 1, type=int)
|
||||||
|
per_page = min(request.args.get("per_page", 100, type=int), 500)
|
||||||
|
account_id = request.args.get("account_id", type=int)
|
||||||
|
status_type = request.args.get("type", "")
|
||||||
|
since = request.args.get("since", "") # ISO datetime
|
||||||
|
|
||||||
|
query = session.query(Status)
|
||||||
|
|
||||||
|
if account_id:
|
||||||
|
query = query.filter(Status.account_db_id == account_id)
|
||||||
|
if status_type:
|
||||||
|
query = query.filter(Status.status_type == status_type)
|
||||||
|
if since:
|
||||||
|
query = query.filter(Status.created_at >= since)
|
||||||
|
|
||||||
|
total = query.count()
|
||||||
|
statuses = (
|
||||||
|
query.order_by(desc(Status.created_at))
|
||||||
|
.offset((page - 1) * per_page)
|
||||||
|
.limit(per_page)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"total": total,
|
||||||
|
"page": page,
|
||||||
|
"per_page": per_page,
|
||||||
|
"statuses": [
|
||||||
|
{
|
||||||
|
"id": s.id,
|
||||||
|
"status_id": s.status_id,
|
||||||
|
"account": s.account.handle,
|
||||||
|
"url": s.url,
|
||||||
|
"content": s.content,
|
||||||
|
"text_content": s.text_content,
|
||||||
|
"visibility": s.visibility,
|
||||||
|
"created_at": s.created_at.isoformat() if s.created_at else None,
|
||||||
|
"language": s.language,
|
||||||
|
"status_type": s.status_type,
|
||||||
|
"in_reply_to_id": s.in_reply_to_id,
|
||||||
|
"replies_count": s.replies_count,
|
||||||
|
"reblogs_count": s.reblogs_count,
|
||||||
|
"favourites_count": s.favourites_count,
|
||||||
|
"mentions": [
|
||||||
|
{"acct": m.mentioned_acct, "url": m.mentioned_url}
|
||||||
|
for m in s.mentions
|
||||||
|
],
|
||||||
|
"tags": [t.name for t in s.tags],
|
||||||
|
}
|
||||||
|
for s in statuses
|
||||||
|
],
|
||||||
|
})
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/export")
|
||||||
|
def export_csv():
|
||||||
|
"""Export statuses as CSV for analysis."""
|
||||||
|
from io import StringIO
|
||||||
|
import csv
|
||||||
|
|
||||||
|
session = get_session()
|
||||||
|
try:
|
||||||
|
account_id = request.args.get("account_id", type=int)
|
||||||
|
status_type = request.args.get("type", "")
|
||||||
|
|
||||||
|
query = session.query(Status).join(MonitoredAccount)
|
||||||
|
if account_id:
|
||||||
|
query = query.filter(Status.account_db_id == account_id)
|
||||||
|
if status_type:
|
||||||
|
query = query.filter(Status.status_type == status_type)
|
||||||
|
|
||||||
|
statuses = query.order_by(desc(Status.created_at)).all()
|
||||||
|
|
||||||
|
output = StringIO()
|
||||||
|
writer = csv.writer(output)
|
||||||
|
writer.writerow([
|
||||||
|
"id", "account", "status_type", "created_at", "url",
|
||||||
|
"text_content", "language", "visibility", "in_reply_to_id",
|
||||||
|
"replies_count", "reblogs_count", "favourites_count",
|
||||||
|
"mentions", "tags", "sensitive", "spoiler_text",
|
||||||
|
])
|
||||||
|
|
||||||
|
for s in statuses:
|
||||||
|
mentions_str = "; ".join(m.mentioned_acct for m in s.mentions)
|
||||||
|
tags_str = "; ".join(t.name for t in s.tags)
|
||||||
|
writer.writerow([
|
||||||
|
s.status_id, s.account.handle, s.status_type,
|
||||||
|
s.created_at.isoformat() if s.created_at else "",
|
||||||
|
s.url, s.text_content, s.language, s.visibility,
|
||||||
|
s.in_reply_to_id, s.replies_count, s.reblogs_count,
|
||||||
|
s.favourites_count, mentions_str, tags_str,
|
||||||
|
s.sensitive, s.spoiler_text,
|
||||||
|
])
|
||||||
|
|
||||||
|
from flask import Response
|
||||||
|
return Response(
|
||||||
|
output.getvalue(),
|
||||||
|
mimetype="text/csv",
|
||||||
|
headers={"Content-Disposition": "attachment; filename=mastodon_statuses.csv"},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(host="0.0.0.0", port=5000, debug=True)
|
||||||
50
docker-compose.yml
Normal file
50
docker-compose.yml
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: mastodon_collector
|
||||||
|
POSTGRES_USER: collector
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-collector_secret}
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:5434:5432"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U collector -d mastodon_collector"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
web:
|
||||||
|
build: .
|
||||||
|
restart: unless-stopped
|
||||||
|
command: gunicorn --bind 0.0.0.0:5000 --workers 2 --timeout 120 app.web:app
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8585:5000"
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
|
||||||
|
FLASK_SECRET_KEY: ${FLASK_SECRET_KEY:-change-me-in-production}
|
||||||
|
volumes:
|
||||||
|
- ./accounts.txt:/app/accounts.txt
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
|
collector:
|
||||||
|
build: .
|
||||||
|
restart: unless-stopped
|
||||||
|
command: python -m app.collector
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
|
||||||
|
POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-14400}
|
||||||
|
volumes:
|
||||||
|
- ./accounts.txt:/app/accounts.txt
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
flask==3.1.0
|
||||||
|
gunicorn==23.0.0
|
||||||
|
psycopg2-binary==2.9.10
|
||||||
|
sqlalchemy==2.0.36
|
||||||
|
requests==2.32.3
|
||||||
|
apscheduler==3.10.4
|
||||||
Loading…
Add table
Reference in a new issue