bluesky-collector/src/config.py
Pieter b1fd78e0c1 Initial commit: Bluesky collector with toxicity analysis
- Bluesky post collector with mention tracking
- PostgreSQL database for storage
- OpenAI-based toxicity analysis
- Web UI for viewing and analyzing posts
- Docker compose setup for deployment

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-08 13:54:36 +01:00

46 lines
1.6 KiB
Python

"""Configuration loader: reads environment variables and accounts YAML."""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
import yaml
@dataclass
class CollectorConfig:
database_url: str
bsky_api_base: str
accounts_file: str
log_level: str = "INFO"
max_pages_per_account: int = 50
mention_lookback_hours: int = 12
feed_page_limit: int = 100 # Bluesky API max per page
bsky_handle: str | None = None # for authenticated search
bsky_app_password: str | None = None # for authenticated search
@classmethod
def from_env(cls) -> CollectorConfig:
return cls(
database_url=os.environ["DATABASE_URL"],
bsky_api_base=os.getenv("BSKY_PUBLIC_API", "https://public.api.bsky.app"),
accounts_file=os.getenv("ACCOUNTS_FILE", "/app/config/accounts.yml"),
log_level=os.getenv("LOG_LEVEL", "INFO"),
max_pages_per_account=int(os.getenv("MAX_PAGES_PER_ACCOUNT", "50")),
mention_lookback_hours=int(os.getenv("MENTION_LOOKBACK_HOURS", "12")),
bsky_handle=os.getenv("BSKY_HANDLE"),
bsky_app_password=os.getenv("BSKY_APP_PASSWORD"),
)
def load_accounts(path: str) -> list[str]:
"""Load the list of Bluesky handles from a YAML file.
Returns a list of handle strings (e.g. ['alice.bsky.social', 'bob.bsky.social']).
"""
data = yaml.safe_load(Path(path).read_text())
if not data or "accounts" not in data:
return []
return [entry["handle"] for entry in data["accounts"] if "handle" in entry]