Add generic LLM provider terminology
- Update all documentation to use "LLM API" instead of "OpenAI GPT-4o-mini" - Rename OPENAI_API_KEY to LLM_API_KEY in configuration - Update code comments to reflect generic LLM usage - Keep OpenAI-compatible client library (supports any LLM provider) - Add LOCAL_OPERATIONS.md and accounts.txt to .gitignore
This commit is contained in:
parent
870a0710b5
commit
754fddef12
7 changed files with 15 additions and 12 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -48,6 +48,9 @@ venv.bak/
|
|||
.DS_Store
|
||||
.claude/
|
||||
|
||||
# Local documentation
|
||||
LOCAL_OPERATIONS.md
|
||||
|
||||
# Database files
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Mastodon Collector
|
||||
|
||||
Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes automated toxicity analysis using OpenAI GPT-4o-mini, a web UI for account management, data browsing, and manual review of flagged content, plus JSON/CSV APIs for your analysis pipeline.
|
||||
Collects posts, replies, and mentions from a list of Mastodon accounts and stores them in PostgreSQL. Includes automated toxicity analysis using LLM API, a web UI for account management, data browsing, and manual review of flagged content, plus JSON/CSV APIs for your analysis pipeline.
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ Edit `.env` to customize:
|
|||
POSTGRES_PASSWORD=collector_secret # Change for production
|
||||
FLASK_SECRET_KEY=change-me-in-production
|
||||
POLL_INTERVAL_SECONDS=14400 # Default: 4 hours (14400s)
|
||||
OPENAI_API_KEY=sk-... # Required for toxicity analysis
|
||||
LLM_API_KEY=sk-... # Required for toxicity analysis
|
||||
```
|
||||
|
||||
## Toxicity Analysis
|
||||
|
|
@ -47,7 +47,7 @@ The system includes automated toxicity detection and manual review capabilities:
|
|||
|
||||
### Features
|
||||
|
||||
- **Automated Classification**: Uses OpenAI GPT-4o-mini to analyze posts across 12 toxicity dimensions:
|
||||
- **Automated Classification**: Uses LLM API to analyze posts across 12 toxicity dimensions:
|
||||
- General toxicity, threats, hate speech
|
||||
- Racism, antisemitism, islamophobia
|
||||
- Sexism, homophobia, ableism
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""Main toxicity analysis orchestrator.
|
||||
|
||||
Runs as a one-shot batch process: fetches unscored statuses,
|
||||
classifies them in batches with GPT-4o-mini, and stores scores in PostgreSQL.
|
||||
classifies them in batches with LLM API, and stores scores in PostgreSQL.
|
||||
|
||||
Usage:
|
||||
python -m app.analyzer
|
||||
|
|
@ -120,7 +120,7 @@ async def run() -> None:
|
|||
|
||||
db = AnalyzerDB(config.database_url)
|
||||
classifier = ToxicityClassifier(
|
||||
api_key=config.openai_api_key,
|
||||
api_key=config.llm_api_key,
|
||||
model=config.model,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""OpenAI-powered toxicity classifier.
|
||||
"""LLM-powered toxicity classifier.
|
||||
|
||||
Sends batches of Mastodon statuses to GPT-4o-mini for multi-category toxicity scoring.
|
||||
Sends batches of Mastodon statuses to LLM API for multi-category toxicity scoring.
|
||||
Returns a list of dicts of category → score (0.0–1.0).
|
||||
"""
|
||||
|
||||
|
|
@ -181,7 +181,7 @@ def parse_batch_response(raw: str, batch_size: int) -> list[ToxicityScores]:
|
|||
|
||||
|
||||
class ToxicityClassifier:
|
||||
"""Async OpenAI-based toxicity classifier with batch support."""
|
||||
"""Async LLM-based toxicity classifier with batch support."""
|
||||
|
||||
def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
|
||||
self.client = AsyncOpenAI(api_key=api_key)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ class AnalyzerConfig:
|
|||
"""Configuration for the toxicity analyzer."""
|
||||
|
||||
database_url: str
|
||||
openai_api_key: str
|
||||
llm_api_key: str
|
||||
model: str = "gpt-4o-mini"
|
||||
batch_size: int = 10
|
||||
concurrency: int = 5
|
||||
|
|
@ -28,7 +28,7 @@ class AnalyzerConfig:
|
|||
"""Load configuration from environment variables."""
|
||||
return cls(
|
||||
database_url=os.environ["DATABASE_URL"],
|
||||
openai_api_key=os.environ["OPENAI_API_KEY"],
|
||||
llm_api_key=os.environ["LLM_API_KEY"],
|
||||
model=os.getenv("ANALYZER_MODEL", "gpt-4o-mini"),
|
||||
batch_size=int(os.getenv("ANALYZER_BATCH_SIZE", "10")),
|
||||
concurrency=int(os.getenv("ANALYZER_CONCURRENCY", "5")),
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ services:
|
|||
environment:
|
||||
DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector
|
||||
POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-14400}
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
LLM_API_KEY: ${LLM_API_KEY}
|
||||
volumes:
|
||||
- ./accounts.txt:/app/accounts.txt
|
||||
depends_on:
|
||||
|
|
|
|||
|
|
@ -5,5 +5,5 @@ sqlalchemy==2.0.36
|
|||
requests==2.32.3
|
||||
apscheduler==3.10.4
|
||||
beautifulsoup4==4.12.3
|
||||
openai==1.58.1
|
||||
openai==1.58.1 # OpenAI-compatible API client (supports any LLM provider)
|
||||
asyncpg==0.30.0
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue