mastodon-collector/app/analyzer/config.py

"""Configuration for the toxicity analyzer."""

from __future__ import annotations

import os
from dataclasses import dataclass


@dataclass
class AnalyzerConfig:
    """Configuration for the toxicity analyzer."""

    database_url: str
    openai_api_key: str
    model: str = "gpt-4o-mini"
    batch_size: int = 10
    concurrency: int = 5
    flag_threshold: float = 0.5
    limit: int = 0  # 0 = no limit
    log_level: str = "INFO"

    # Pricing (as of 2025, per million tokens)
    input_cost_per_m: float = 0.150  # $0.150 per 1M input tokens
    output_cost_per_m: float = 0.600  # $0.600 per 1M output tokens

    @classmethod
    def from_env(cls) -> AnalyzerConfig:
        """Load configuration from environment variables."""
        return cls(
            database_url=os.environ["DATABASE_URL"],
            openai_api_key=os.environ["OPENAI_API_KEY"],
            model=os.getenv("ANALYZER_MODEL", "gpt-4o-mini"),
            batch_size=int(os.getenv("ANALYZER_BATCH_SIZE", "10")),
            concurrency=int(os.getenv("ANALYZER_CONCURRENCY", "5")),
            flag_threshold=float(os.getenv("ANALYZER_FLAG_THRESHOLD", "0.5")),
            limit=int(os.getenv("ANALYZER_LIMIT", "0")),
            log_level=os.getenv("ANALYZER_LOG_LEVEL", "INFO"),
        )