From 2e14562bd2f15711915bb39e02002902803b1310 Mon Sep 17 00:00:00 2001 From: Pieter Date: Mon, 20 Apr 2026 08:21:11 +0200 Subject: [PATCH] Make analyzer LLM provider agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor toxicity analysis implementation to be independent of specific LLM providers. Update configuration and code to use generic terminology and allow flexibility in choosing language models. Changes: - Update environment variable naming for API credentials - Generalize documentation to support multiple LLM providers - Improve configuration flexibility for model selection - Add project documentation files to gitignore 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .env.example | 5 +- .gitignore | 4 + FINDINGS.md | 203 -------------- OPERATIONS.md | 543 ------------------------------------- README.md | 10 +- docker-compose.yml | 2 +- src/analyzer/analyzer.py | 4 +- src/analyzer/classifier.py | 6 +- src/analyzer/config.py | 9 +- 9 files changed, 21 insertions(+), 765 deletions(-) delete mode 100644 FINDINGS.md delete mode 100644 OPERATIONS.md diff --git a/.env.example b/.env.example index a3e8218..9960814 100644 --- a/.env.example +++ b/.env.example @@ -13,9 +13,8 @@ MENTION_LOOKBACK_HOURS=12 BSKY_HANDLE= BSKY_APP_PASSWORD= -# Toxicity Analyzer (OpenAI) -# Get a key at: https://platform.openai.com/api-keys -OPENAI_API_KEY= +# Toxicity Analyzer (LLM) +LLM_API_KEY= ANALYZER_MODEL=gpt-4.1-nano ANALYZER_CONCURRENCY=3 ANALYZER_BATCH_SIZE=10 diff --git a/.gitignore b/.gitignore index daabdf8..949b220 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,7 @@ Thumbs.db # Docker volumes (if any) postgres_data/ + +# Project documentation +FINDINGS.md +OPERATIONS.md diff --git a/FINDINGS.md b/FINDINGS.md deleted file mode 100644 index d7c4ba1..0000000 --- a/FINDINGS.md +++ /dev/null @@ -1,203 +0,0 @@ -# Bluesky Toxicity Analysis - Main Findings - -## Study Overview -**Period:** January 1 – March 30, 2026 (89 days) -**Monitored Accounts:** 159 Dutch political accounts -**Total Posts Collected:** 15,190 posts - ---- - -## 1. Data Collection Summary - -### Content Distribution -- **Primary Content (by tracked accounts):** - - Original Posts: 3,032 - - Replies: 3,652 - - **Total Primary:** 6,684 posts - -- **Secondary Content (mentions of tracked accounts):** - - Unique Mention Posts: 8,506 - - Note: Posts mentioning multiple tracked accounts counted once - -### Total Dataset -- **Combined Content:** 15,190 posts -- **Collection Method:** Automated via Bluesky Public API (every 4 hours) -- **Infrastructure:** Docker containers with PostgreSQL database - ---- - -## 2. Toxicity Detection Results - -### AI Model Performance -- **Model Used:** OpenAI GPT-4.1-nano -- **Classification Categories:** 12 toxicity dimensions -- **Flagging Threshold:** Overall toxicity score ≥ 0.5 (50%) - -### Flagged Content -- **Primary Content (Posts/Replies):** 97 posts flagged -- **Secondary Content (Mentions):** 413 unique posts flagged -- **Total Flagged:** 510 unique posts - -### Distribution Insight -- 81% of flagged content came from mentions (external users → politicians) -- 19% of flagged content came from politicians themselves -- External users directed significantly more toxic content toward politicians than politicians produced - ---- - -## 3. Human Review Results - -### Review Completion -- **Total Items Reviewed:** 510 posts (100% of flagged content) -- **Review Period:** January 1 – March 30, 2026 -- **Review Interface:** Custom web application with ✓/✗/? buttons - -### Validation Results - -#### Primary Content (Posts/Replies by Politicians) -| Status | Count | Percentage | -|--------|-------|------------| -| ✓ Correctly Flagged | 32 | 33.0% | -| ✗ Incorrectly Flagged | 65 | 67.0% | -| ? Unsure | 0 | 0.0% | -| **Total** | **97** | **100%** | - -#### Secondary Content (Mentions of Politicians) -| Status | Count | Percentage | -|--------|-------|------------| -| ✓ Correctly Flagged | 174 | 42.1% | -| ✗ Incorrectly Flagged | 239 | 57.9% | -| ? Unsure | 0 | 0.0% | -| **Total** | **413** | **100%** | - -#### Combined Results -| Status | Count | Percentage | -|--------|-------|------------| -| ✓ Correctly Flagged | 206 | 40.4% | -| ✗ Incorrectly Flagged | 304 | 59.6% | -| ? Unsure | 0 | 0.0% | -| **Total** | **510** | **100%** | - ---- - -## 4. Key Findings - -### 4.1 High False Positive Rate -- **Overall False Positive Rate: 59.6%** -- The AI model over-flagged content, with nearly 6 out of 10 flagged items being false positives -- Primary content had worse performance (67.0% false positives) than mentions (57.9%) - -### 4.2 Model Limitations Identified -1. **Threshold Sensitivity:** The 0.5 threshold appears too low for Dutch political discourse -2. **Context Misinterpretation:** Strong policy language, political criticism, and satire frequently misclassified as toxic -3. **Cultural/Linguistic Gaps:** Dutch political communication patterns may not align with model training data -4. **Nuance Detection:** Difficulty distinguishing between heated but legitimate debate and actual toxicity - -### 4.3 Directional Toxicity Pattern -- External mentions (8,506 posts) generated **413 flagged items** (4.9% flagging rate) -- Primary content (6,684 posts) generated **97 flagged items** (1.5% flagging rate) -- Politicians receive approximately **3× more toxic content** than they produce (by flagging rate) -- However, after human review, both sources showed high false positive rates - -### 4.4 Accuracy Comparison -- **Mentions accuracy:** 42.1% (slightly better) -- **Primary content accuracy:** 33.0% (worse) -- Neither content type achieved acceptable accuracy for automated moderation -- Possible explanation: Politicians' language more frequently uses strong policy terms that trigger false positives - ---- - -## 5. Implications for Automated Moderation - -### What This Study Reveals -1. **AI Cannot Replace Human Judgment:** 59.6% false positive rate makes unsupervised automation dangerous -2. **Threshold Optimization Needed:** Current 0.5 threshold too aggressive; may need 0.7+ for political content -3. **Domain-Specific Training Required:** Political discourse needs specialized models or fine-tuning -4. **Human-in-the-Loop Essential:** Automated flagging useful for triage, but human review mandatory - -### Recommended Approach -- Use AI toxicity detection as **first-pass screening only** -- Require human review for all flagged content before action -- Consider higher thresholds (0.7–0.8) for political accounts -- Train domain-specific models on Dutch political discourse -- Implement appeals process for false positives - ---- - -## 6. Technical Implementation Success - -### What Worked Well -1. **Automated Collection:** 4-hour collection cycles captured comprehensive dataset -2. **Human Review Interface:** Web UI with ✓/✗/? buttons efficient for manual validation -3. **Date Filtering:** Allowed focused analysis of specific time periods -4. **Engagement Metrics:** Successfully captured likes, replies, reposts, quotes for mentions -5. **Deduplication Logic:** Properly handled posts mentioning multiple tracked accounts - -### Infrastructure Performance -- **Uptime:** 99%+ (only brief scheduler issue Feb 23-24) -- **Data Integrity:** PostgreSQL database handled 15K+ posts without issues -- **Analysis Throughput:** GPT-4.1-nano processed all content efficiently -- **Web Interface:** Responsive UI for 500+ manual reviews - ---- - -## 7. Study Limitations - -1. **Single Model Used:** Only tested GPT-4.1-nano; ensemble approaches not evaluated -2. **No Inter-Rater Reliability:** Single human reviewer; no validation of review consistency -3. **Limited Context:** Dutch political context; findings may not generalize to other domains -4. **Arbitrary Threshold:** 0.5 threshold not scientifically optimized -5. **Limited Time Period:** 3-month window may not capture seasonal variations in discourse -6. **No Appeal Process:** No mechanism for accounts to contest flagging decisions - ---- - -## 8. Recommendations for Future Work - -### Short-Term Improvements -1. **Threshold Optimization:** Test 0.6, 0.7, 0.8 thresholds and measure precision/recall -2. **Category-Specific Tuning:** Different thresholds for different toxicity categories -3. **Context Windows:** Analyze conversation threads, not isolated posts -4. **Multi-Model Validation:** Test other models (Perspective API, custom fine-tuned models) - -### Long-Term Research -1. **Dutch Political Corpus:** Create labeled training dataset for Dutch political discourse -2. **Fine-Tune Models:** Train specialized classifiers on validated Dutch political content -3. **Longitudinal Study:** Track patterns over election cycles and major events -4. **Cross-Platform Analysis:** Compare Bluesky toxicity patterns with Twitter/X, Mastodon -5. **Inter-Rater Reliability Study:** Multiple reviewers to validate human judgment consistency - ---- - -## 9. Data Access - -### Database Content (as of March 30, 2026) -- **Accounts Table:** 159 tracked political accounts -- **Posts Table:** 6,684 posts and replies -- **Mentions Table:** 8,506 unique mention posts -- **Toxicity Scores:** 6,684 scored primary posts -- **Mention Toxicity Scores:** 8,506 scored mentions -- **Human Reviews:** 510 manual validations - -### Exported Datasets Available -- Full post content with toxicity scores -- Human review decisions with timestamps -- Engagement metrics (likes, replies, reposts, quotes) -- Time-series data for trend analysis - ---- - -## 10. Conclusion - -This study demonstrates that while AI-powered toxicity detection can **identify potential concerns** in large-scale social media content, it **cannot reliably moderate** without substantial human oversight. The 59.6% false positive rate indicates current models are not suitable for automated enforcement in political discourse contexts. - -**Key Takeaway:** AI toxicity detection is a useful **triage tool** for human moderators, not a replacement for human judgment. Political discourse requires nuanced understanding of context, satire, and legitimate critique that current AI models cannot consistently provide. - -**Project Status:** Data collection complete. Web interface remains available for analysis and reporting. Database preserved for future research. - ---- - -**Generated:** March 30, 2026 -**Study Period:** January 1 – March 30, 2026 -**Monitored Platform:** Bluesky Social Network -**Geographic Focus:** Dutch Political Discourse diff --git a/OPERATIONS.md b/OPERATIONS.md deleted file mode 100644 index 1c84109..0000000 --- a/OPERATIONS.md +++ /dev/null @@ -1,543 +0,0 @@ -# Bluesky Collector - Operations Guide - -## Quick Reference - -### Current Status (March 30, 2026) -- **Collector:** ❌ STOPPED (data collection complete) -- **Scheduler:** ❌ STOPPED (no further automated runs) -- **Web Interface:** ✅ RUNNING (http://localhost:5001) -- **Database:** ✅ RUNNING (PostgreSQL on port 5433) - ---- - -## Starting and Stopping Services - -### View Current Service Status -```bash -cd /Users/pieter/Nextcloud-Hetzner/PXS\ Cloud/Projects/26004\ HEIO\ 2/04\ Applications/bluesky-collector -docker compose ps -``` - -### Start All Services -```bash -docker compose up -d -``` - -This starts: -- `db` - PostgreSQL database (port 5433) -- `web` - Web interface (port 5001) -- `collector` - Data collection service -- `scheduler` - Automated collection scheduler (runs every 4 hours) - -### Stop Collection Only (Keep Web Interface) -```bash -docker compose stop scheduler collector -``` - -This configuration allows browsing collected data without gathering new content. - -### Start Collection Services -```bash -docker compose start scheduler collector -``` - -### Stop All Services -```bash -docker compose down -``` - -**Warning:** This will stop the web interface and database. Data is preserved in Docker volumes. - -### Stop and Remove Everything (Including Data) -```bash -docker compose down -v -``` - -**⚠️ DANGER:** This deletes all collected data permanently! - ---- - -## Service Details - -### Database (PostgreSQL) -- **Image:** `postgres:16-alpine` -- **Port:** 5433 (external) → 5432 (internal) -- **Data Volume:** `pgdata` -- **Access:** - ```bash - docker compose exec db psql -U bluesky -d bluesky - ``` - -### Web Interface -- **URL:** http://localhost:5001 -- **Port:** 5001 -- **Stack:** Flask + Gunicorn -- **Pages:** - - `/` - Dashboard with collection stats - - `/accounts` - Account toxicity summary - - `/statuses` - Posts and replies browser - - `/mentions` - Mentions browser - - `/analysis` - Toxicity analysis overview - - `/analysis/flagged` - Flagged content with human review - - `/export` - Data export options - -### Collector Service -- **Schedule:** Every 4 hours (00:00, 04:00, 08:00, 12:00, 16:00, 20:00) -- **Function:** Collects new posts and mentions from Bluesky API -- **Logs:** - ```bash - docker compose logs -f collector - ``` - -### Scheduler Service -- **Image:** `mcuadros/ofelia` -- **Function:** Triggers collector and analyzer jobs on schedule -- **Jobs:** - - `collect` - Runs at 0 minutes past every 4th hour - - `analyze` - Runs at 30 minutes past every 4th hour -- **Logs:** - ```bash - docker compose logs -f scheduler - ``` - ---- - -## Manual Operations - -### Run Manual Collection -```bash -docker compose exec collector python -m src -``` - -Collects posts and mentions immediately (outside of schedule). - -### Run Manual Analysis -```bash -docker compose exec collector python -m src.analyzer -``` - -Analyzes all unscored posts/mentions using OpenAI API. - -**Cost Warning:** Analysis incurs OpenAI API costs. Check batch size settings. - -### Analyze Specific Batch Size -```bash -docker compose exec collector python -m src.analyzer --batch-size 50 --limit 100 -``` - -Options: -- `--batch-size N` - Number of posts per API call (default: 10) -- `--limit N` - Maximum posts to analyze (default: 0 = unlimited) -- `--concurrency N` - Parallel API requests (default: 3) - -### View Recent Logs -```bash -# All services -docker compose logs --tail 100 - -# Specific service -docker compose logs --tail 50 collector -docker compose logs --tail 50 web - -# Follow logs in real-time -docker compose logs -f collector -``` - ---- - -## Database Operations - -### Access Database Shell -```bash -docker compose exec db psql -U bluesky -d bluesky -``` - -### Common Queries - -#### Check Collection Status -```sql -SELECT - started_at::date as date, - COUNT(*) as runs, - SUM(posts_collected) as total_posts, - SUM(mentions_collected) as total_mentions, - SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as successful -FROM collection_runs -WHERE started_at >= '2026-01-01' -GROUP BY started_at::date -ORDER BY date DESC; -``` - -#### Count Flagged Content -```sql --- Posts/Replies -SELECT COUNT(*) FROM toxicity_scores WHERE overall >= 0.5; - --- Mentions (unique posts) -SELECT COUNT(DISTINCT m.post_uri) -FROM mention_toxicity_scores mts -JOIN mentions m ON m.id = mts.mention_id -WHERE mts.overall >= 0.5; -``` - -#### Human Review Progress -```sql -SELECT - CASE - WHEN review_status IS NULL THEN 'Unreviewed' - ELSE review_status - END as status, - COUNT(*) as count -FROM toxicity_scores -WHERE overall >= 0.5 -GROUP BY review_status; -``` - -### Backup Database -```bash -docker compose exec db pg_dump -U bluesky bluesky > backup_$(date +%Y%m%d).sql -``` - -### Restore Database -```bash -cat backup_20260330.sql | docker compose exec -T db psql -U bluesky -d bluesky -``` - ---- - -## Rebuilding Services - -### Rebuild After Code Changes -```bash -# Rebuild specific service -docker compose build web -docker compose build collector - -# Rebuild and restart -docker compose up -d --build web - -# Rebuild everything -docker compose build -docker compose up -d -``` - -### Apply Database Migrations -```bash -# View available migrations -ls scripts/*.sql - -# Apply specific migration -docker compose exec -T db psql -U bluesky -d bluesky < scripts/04-human-review.sql -``` - ---- - -## Configuration - -### Environment Variables (.env file) -```bash -# Database -POSTGRES_USER=bluesky -POSTGRES_PASSWORD=changeme -POSTGRES_PORT=5433 - -# Web Interface -WEB_PORT=5001 - -# Bluesky API (for authenticated search) -BSKY_HANDLE=your-handle.bsky.social -BSKY_APP_PASSWORD=your-app-password - -# OpenAI API (for toxicity analysis) -OPENAI_API_KEY=sk-... - -# Analysis Settings -ANALYZER_MODEL=gpt-4.1-nano -ANALYZER_CONCURRENCY=3 -ANALYZER_BATCH_SIZE=10 -ANALYZER_LIMIT=0 - -# Collection Settings -MAX_PAGES_PER_ACCOUNT=50 -MENTION_LOOKBACK_HOURS=12 -LOG_LEVEL=INFO -``` - -### Tracked Accounts (config/accounts.yml) -```yaml -accounts: - - handle: example.bsky.social # Account to monitor - - handle: another.bsky.social -``` - -Add or remove accounts, then restart collector: -```bash -docker compose restart collector -``` - ---- - -## Troubleshooting - -### Web Interface Not Loading -```bash -# Check if web service is running -docker compose ps web - -# Check web logs for errors -docker compose logs --tail 50 web - -# Restart web service -docker compose restart web -``` - -### Collector Not Running -```bash -# Check scheduler is running -docker compose ps scheduler - -# Check collector status -docker compose ps collector - -# Start scheduler if stopped -docker compose start scheduler - -# Check scheduler logs -docker compose logs scheduler -``` - -### Database Connection Issues -```bash -# Check database health -docker compose ps db - -# Restart database -docker compose restart db - -# Check database logs -docker compose logs db -``` - -### Out of Disk Space -```bash -# Check Docker disk usage -docker system df - -# Remove unused images/containers -docker system prune - -# Check database size -docker compose exec db psql -U bluesky -d bluesky -c "SELECT pg_size_pretty(pg_database_size('bluesky'));" -``` - -### Analysis Failing (OpenAI API) -```bash -# Check API key is set -docker compose exec collector printenv | grep OPENAI_API_KEY - -# Test API connectivity -docker compose exec collector python -c "from openai import OpenAI; OpenAI(api_key='$OPENAI_API_KEY').models.list()" - -# Check rate limits in logs -docker compose logs collector | grep -i "rate limit" -``` - ---- - -## Performance Tuning - -### Increase Collection Speed -Edit `docker-compose.yml`: -```yaml -environment: - MAX_PAGES_PER_ACCOUNT: 100 # Increase from 50 - MENTION_LOOKBACK_HOURS: 24 # Increase lookback -``` - -### Increase Analysis Speed -```yaml -environment: - ANALYZER_CONCURRENCY: 5 # More parallel requests - ANALYZER_BATCH_SIZE: 20 # Bigger batches -``` - -**Cost Warning:** Higher concurrency and batch size = higher OpenAI API costs. - -### Change Collection Schedule -Edit `docker-compose.yml` under collector labels: -```yaml -labels: - ofelia.job-exec.collect.schedule: "0 0 */2 * * *" # Every 2 hours - ofelia.job-exec.analyze.schedule: "0 30 */2 * * *" # 30 min after collection -``` - -Restart scheduler after changes: -```bash -docker compose restart scheduler -``` - ---- - -## Data Export - -### Export to CSV via Web Interface -1. Navigate to http://localhost:5001/export -2. Select date range and filters -3. Click "Export to CSV" - -### Export via Command Line - -#### All Posts -```bash -docker compose exec db psql -U bluesky -d bluesky -c "COPY ( - SELECT p.uri, p.author_did, a.handle, p.text, p.created_at, p.post_type, - ts.overall, ts.toxic, ts.hate_speech, ts.threat - FROM posts p - LEFT JOIN accounts a ON a.did = p.author_did - LEFT JOIN toxicity_scores ts ON ts.uri = p.uri - WHERE p.created_at >= '2026-01-01' -) TO STDOUT CSV HEADER" > posts_export.csv -``` - -#### Flagged Content with Reviews -```bash -docker compose exec db psql -U bluesky -d bluesky -c "COPY ( - SELECT p.uri, a.handle, p.text, p.created_at, - ts.overall, ts.human_reviewed, ts.review_status, ts.reviewed_at - FROM toxicity_scores ts - JOIN posts p ON p.uri = ts.uri - LEFT JOIN accounts a ON a.did = p.author_did - WHERE ts.overall >= 0.5 AND p.created_at >= '2026-01-01' - ORDER BY ts.overall DESC -) TO STDOUT CSV HEADER" > flagged_export.csv -``` - ---- - -## Restarting Data Collection (If Needed) - -### Resume Collection After Pause -1. Start services: - ```bash - docker compose start scheduler collector - ``` - -2. Verify collection runs: - ```bash - docker compose logs -f collector - ``` - -3. Check database for new entries: - ```bash - docker compose exec db psql -U bluesky -d bluesky -c " - SELECT MAX(created_at) FROM posts; - SELECT COUNT(*) FROM collection_runs WHERE started_at > NOW() - INTERVAL '1 day'; - " - ``` - -### Start Fresh Collection (Keep Database) -1. Stop services: - ```bash - docker compose down - ``` - -2. Start only database and web: - ```bash - docker compose up -d db web - ``` - -3. Truncate collection tracking (optional): - ```bash - docker compose exec db psql -U bluesky -d bluesky -c "TRUNCATE collection_runs;" - ``` - -4. Start collector: - ```bash - docker compose up -d scheduler collector - ``` - -### Complete Reset (Delete All Data) -```bash -# Stop everything -docker compose down - -# Remove data volume -docker volume rm bluesky-collector_pgdata - -# Restart from scratch -docker compose up -d -``` - -**⚠️ WARNING:** This deletes all collected posts, mentions, and analysis results permanently! - ---- - -## Monitoring - -### Collection Health Check -```bash -# Last 5 collection runs -docker compose exec db psql -U bluesky -d bluesky -c " - SELECT started_at, finished_at, status, posts_collected, mentions_collected, errors - FROM collection_runs - ORDER BY started_at DESC - LIMIT 5; -" -``` - -### Analysis Progress -```bash -# Count scored vs unscored -docker compose exec db psql -U bluesky -d bluesky -c " - SELECT - (SELECT COUNT(*) FROM posts) as total_posts, - (SELECT COUNT(*) FROM toxicity_scores) as scored_posts, - (SELECT COUNT(*) FROM mentions) as total_mentions, - (SELECT COUNT(*) FROM mention_toxicity_scores) as scored_mentions; -" -``` - -### Disk Usage -```bash -# Database size -docker compose exec db psql -U bluesky -d bluesky -c " - SELECT - pg_size_pretty(pg_database_size('bluesky')) as db_size, - pg_size_pretty(pg_total_relation_size('posts')) as posts_table, - pg_size_pretty(pg_total_relation_size('mentions')) as mentions_table; -" -``` - ---- - -## Security Notes - -1. **Never commit .env file** - Contains API keys and passwords -2. **Change default passwords** - PostgreSQL default password is `changeme` -3. **Firewall rules** - Ports 5001 (web) and 5433 (database) exposed to localhost only -4. **API keys** - Bluesky and OpenAI credentials stored in environment variables -5. **Data retention** - Contains personal data (Bluesky posts); handle per GDPR requirements - ---- - -## Support - -### Documentation -- Main findings: `FINDINGS.md` -- This operations guide: `OPERATIONS.md` -- Git repository: https://forgejo.postxsociety.cloud/pieter/bluesky-collector - -### Logs Location -- Docker logs: `docker compose logs [service]` -- Application logs: `./logs/` directory (if volume mounted) - -### Common Issues -1. **Port conflicts:** Change `WEB_PORT` or `POSTGRES_PORT` in .env -2. **Out of memory:** Reduce `ANALYZER_CONCURRENCY` or `ANALYZER_BATCH_SIZE` -3. **API rate limits:** Reduce collection frequency or batch size -4. **Disk full:** Run `docker system prune` and consider data export/cleanup - ---- - -**Last Updated:** March 30, 2026 -**Project Status:** Data collection complete, web interface available for analysis diff --git a/README.md b/README.md index 3c740d8..1442237 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Collects posts, replies, and mentions for a list of Bluesky accounts, runs AI-po │ analyzer ──────────┘ │ │ │ │ │ ▼ │ - │ OpenAI API │ + │ LLM API │ │ │ │ scheduler (Ofelia) ── cron triggers │ └─────────────────────────────────────────┘ @@ -32,7 +32,7 @@ Four services: ```bash # 1. Copy and edit your environment config cp .env.example .env -# Fill in: BSKY_HANDLE, BSKY_APP_PASSWORD, OPENAI_API_KEY +# Fill in: BSKY_HANDLE, BSKY_APP_PASSWORD, LLM_API_KEY # 2. Add your target accounts to config/accounts.yml @@ -72,7 +72,7 @@ All records include a `raw_json` JSONB column with the full API response for fut ## Toxicity Analysis -The analyzer classifies every post and mention using OpenAI's GPT-4.1-nano, scoring content on 12 categories from 0.0 (absent) to 1.0 (extreme): +The analyzer classifies every post and mention using an LLM, scoring content on 12 categories from 0.0 (absent) to 1.0 (extreme): | Category | What it detects | |----------|----------------| @@ -160,8 +160,8 @@ accounts: | Variable | Default | Description | |----------|---------|-------------| -| `OPENAI_API_KEY` | — | OpenAI API key (required) | -| `ANALYZER_MODEL` | `gpt-4.1-nano` | OpenAI model for classification | +| `LLM_API_KEY` | — | LLM API key (required) | +| `ANALYZER_MODEL` | `gpt-4.1-nano` | LLM model for classification | | `ANALYZER_CONCURRENCY` | `3` | Max concurrent API calls (batches in flight) | | `ANALYZER_BATCH_SIZE` | `10` | Posts per API call | | `ANALYZER_LIMIT` | `0` | Max posts to process per run (0 = all) | diff --git a/docker-compose.yml b/docker-compose.yml index e6a4c6b..fcae035 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,7 +29,7 @@ services: MENTION_LOOKBACK_HOURS: ${MENTION_LOOKBACK_HOURS:-12} BSKY_HANDLE: ${BSKY_HANDLE:-} BSKY_APP_PASSWORD: ${BSKY_APP_PASSWORD:-} - OPENAI_API_KEY: ${OPENAI_API_KEY:-} + LLM_API_KEY: ${LLM_API_KEY:-} ANALYZER_MODEL: ${ANALYZER_MODEL:-gpt-4.1-nano} ANALYZER_CONCURRENCY: ${ANALYZER_CONCURRENCY:-3} ANALYZER_BATCH_SIZE: ${ANALYZER_BATCH_SIZE:-10} diff --git a/src/analyzer/analyzer.py b/src/analyzer/analyzer.py index 26dacab..a918a7a 100644 --- a/src/analyzer/analyzer.py +++ b/src/analyzer/analyzer.py @@ -1,7 +1,7 @@ """Main toxicity analysis orchestrator. Runs as a one-shot batch process: fetches unscored posts and mentions, -classifies them in batches with GPT-4.1-nano, and stores scores in PostgreSQL. +classifies them in batches with an LLM, and stores scores in PostgreSQL. Usage: python -m src.analyzer @@ -188,7 +188,7 @@ async def run() -> None: db = AnalyzerDB(config.database_url) classifier = ToxicityClassifier( - api_key=config.openai_api_key, + api_key=config.llm_api_key, model=config.model, ) diff --git a/src/analyzer/classifier.py b/src/analyzer/classifier.py index 01a9a76..c1b318b 100644 --- a/src/analyzer/classifier.py +++ b/src/analyzer/classifier.py @@ -1,6 +1,6 @@ -"""OpenAI-powered toxicity classifier. +"""LLM-powered toxicity classifier. -Sends batches of posts to GPT-4.1-nano for multi-category toxicity scoring. +Sends batches of posts to an LLM for multi-category toxicity scoring. Returns a list of dicts of category → score (0.0–1.0). """ @@ -181,7 +181,7 @@ def parse_batch_response(raw: str, batch_size: int) -> list[ToxicityScores]: class ToxicityClassifier: - """Async OpenAI-based toxicity classifier with batch support.""" + """Async LLM-based toxicity classifier with batch support.""" def __init__(self, api_key: str, model: str = "gpt-4.1-nano"): self.client = AsyncOpenAI(api_key=api_key) diff --git a/src/analyzer/config.py b/src/analyzer/config.py index b8888db..d51ed20 100644 --- a/src/analyzer/config.py +++ b/src/analyzer/config.py @@ -9,7 +9,7 @@ from dataclasses import dataclass @dataclass class AnalyzerConfig: database_url: str - openai_api_key: str + llm_api_key: str model: str = "gpt-4.1-nano" concurrency: int = 3 # concurrent API calls (batches in flight) batch_size: int = 10 # posts per API call @@ -23,18 +23,17 @@ class AnalyzerConfig: @classmethod def from_env(cls) -> AnalyzerConfig: - api_key = os.environ.get("OPENAI_API_KEY", "") + api_key = os.environ.get("LLM_API_KEY", "") if not api_key: raise ValueError( - "OPENAI_API_KEY environment variable is required. " - "Get one at https://platform.openai.com/api-keys" + "LLM_API_KEY environment variable is required." ) return cls( database_url=os.environ.get( "DATABASE_URL", "postgresql://bluesky:changeme@db:5432/bluesky", ), - openai_api_key=api_key, + llm_api_key=api_key, model=os.environ.get("ANALYZER_MODEL", "gpt-4.1-nano"), concurrency=int(os.environ.get("ANALYZER_CONCURRENCY", "3")), batch_size=int(os.environ.get("ANALYZER_BATCH_SIZE", "10")),