Add generic LLM provider terminology

- Update all documentation to use "LLM API" instead of "OpenAI GPT-4o-mini"
- Rename OPENAI_API_KEY to LLM_API_KEY in configuration
- Update code comments to reflect generic LLM usage
- Keep OpenAI-compatible client library (supports any LLM provider)
- Add LOCAL_OPERATIONS.md and accounts.txt to .gitignore
This commit is contained in:
Pieter 2026-04-18 20:27:09 +02:00
parent 870a0710b5
commit 754fddef12
7 changed files with 15 additions and 12 deletions

3
.gitignore vendored
View file

@ -48,6 +48,9 @@ venv.bak/
.DS_Store
.claude/
# Local documentation
LOCAL_OPERATIONS.md
# Database files
*.sqlite
*.sqlite3

View file

@ -1,6 +1,6 @@
# Mastodon Collector
Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes automated toxicity analysis using OpenAI GPT-4o-mini, a web UI for account management, data browsing, and manual review of flagged content, plus JSON/CSV APIs for your analysis pipeline.
Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes automated toxicity analysis using LLM API, a web UI for account management, data browsing, and manual review of flagged content, plus JSON/CSV APIs for your analysis pipeline.
## Quick Start
@ -38,7 +38,7 @@ Edit `.env` to customize:
POSTGRES_PASSWORD=collector_secret # Change for production
FLASK_SECRET_KEY=change-me-in-production
POLL_INTERVAL_SECONDS=14400 # Default: 4 hours (14400s)
OPENAI_API_KEY=sk-... # Required for toxicity analysis
LLM_API_KEY=sk-... # Required for toxicity analysis
```
## Toxicity Analysis
@ -47,7 +47,7 @@ The system includes automated toxicity detection and manual review capabilities:
### Features
- **Automated Classification**: Uses OpenAI GPT-4o-mini to analyze posts across 12 toxicity dimensions:
- **Automated Classification**: Uses LLM API to analyze posts across 12 toxicity dimensions:
- General toxicity, threats, hate speech
- Racism, antisemitism, islamophobia
- Sexism, homophobia, ableism

View file

@ -1,7 +1,7 @@
"""Main toxicity analysis orchestrator.
Runs as a one-shot batch process: fetches unscored statuses,
classifies them in batches with GPT-4o-mini, and stores scores in PostgreSQL.
classifies them in batches with LLM API, and stores scores in PostgreSQL.
Usage:
python -m app.analyzer
@ -120,7 +120,7 @@ async def run() -> None:
db = AnalyzerDB(config.database_url)
classifier = ToxicityClassifier(
api_key=config.openai_api_key,
api_key=config.llm_api_key,
model=config.model,
)

View file

@ -1,6 +1,6 @@
"""OpenAI-powered toxicity classifier.
"""LLM-powered toxicity classifier.
Sends batches of Mastodon statuses to GPT-4o-mini for multi-category toxicity scoring.
Sends batches of Mastodon statuses to LLM API for multi-category toxicity scoring.
Returns a list of dicts of category score (0.01.0).
"""
@ -181,7 +181,7 @@ def parse_batch_response(raw: str, batch_size: int) -> list[ToxicityScores]:
class ToxicityClassifier:
"""Async OpenAI-based toxicity classifier with batch support."""
"""Async LLM-based toxicity classifier with batch support."""
def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
self.client = AsyncOpenAI(api_key=api_key)

View file

@ -11,7 +11,7 @@ class AnalyzerConfig:
"""Configuration for the toxicity analyzer."""
database_url: str
openai_api_key: str
llm_api_key: str
model: str = "gpt-4o-mini"
batch_size: int = 10
concurrency: int = 5
@ -28,7 +28,7 @@ class AnalyzerConfig:
"""Load configuration from environment variables."""
return cls(
database_url=os.environ["DATABASE_URL"],
openai_api_key=os.environ["OPENAI_API_KEY"],
llm_api_key=os.environ["LLM_API_KEY"],
model=os.getenv("ANALYZER_MODEL", "gpt-4o-mini"),
batch_size=int(os.getenv("ANALYZER_BATCH_SIZE", "10")),
concurrency=int(os.getenv("ANALYZER_CONCURRENCY", "5")),

View file

@ -40,7 +40,7 @@ services:
environment:
DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-14400}
OPENAI_API_KEY: ${OPENAI_API_KEY}
LLM_API_KEY: ${LLM_API_KEY}
volumes:
- ./accounts.txt:/app/accounts.txt
depends_on:

View file

@ -5,5 +5,5 @@ sqlalchemy==2.0.36
requests==2.32.3
apscheduler==3.10.4
beautifulsoup4==4.12.3
openai==1.58.1
openai==1.58.1 # OpenAI-compatible API client (supports any LLM provider)
asyncpg==0.30.0