diff --git a/.claude/settings.local.json b/.claude/settings.local.json index cdcb402..775f3e3 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,13 @@ "allow": [ "Bash(git push:*)", "Read(//tmp/bluesky-collector/**)", - "Bash(mkdir -p \"/Users/pieter/Nextcloud-Hetzner/PXS Cloud/Projects/26004 HEIO 2/04 Applications/mastodon-collector/app/analyzer\")" + "Bash(mkdir -p \"/Users/pieter/Nextcloud-Hetzner/PXS Cloud/Projects/26004 HEIO 2/04 Applications/mastodon-collector/app/analyzer\")", + "Bash(docker-compose build)", + "Bash(docker compose build)", + "Bash(docker compose up -d)", + "Bash(docker exec mastodon-collector-collector-1 bash -c \"ANALYZER_LIMIT=100 python -m app.analyzer\")", + "Bash(docker compose build collector)", + "Bash(docker compose up -d collector)" ], "deny": [], "ask": [] diff --git a/app/analyzer/db.py b/app/analyzer/db.py index 0112949..eaf6c2b 100644 --- a/app/analyzer/db.py +++ b/app/analyzer/db.py @@ -57,11 +57,11 @@ class AnalyzerDB: Skips boosts (reblogs) and statuses with empty content. """ query = """ - SELECT s.id, s.content, s.account_id + SELECT s.id, s.content, s.account_db_id FROM statuses s LEFT JOIN toxicity_scores ts ON ts.status_id = s.id WHERE ts.status_id IS NULL - AND s.reblog_of_id IS NULL + AND s.status_type != 'reblog' AND s.content IS NOT NULL AND s.content != '' ORDER BY s.created_at DESC diff --git a/app/templates/analysis.html b/app/templates/analysis.html new file mode 100644 index 0000000..1f8da90 --- /dev/null +++ b/app/templates/analysis.html @@ -0,0 +1,734 @@ +{% extends "base.html" %} + +{% block title %}Toxicity Analysis Dashboard{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} + + + +
+ +
+
+
Total Scored
+
{{ (stats.total_scored_posts + stats.total_scored_mentions) | format_number }}
+
+
+ {{ stats.total_scored_posts | format_number }} posts + + + {{ stats.total_scored_mentions | format_number }} mentions +
+
+ + +
+
+
Flagged Posts
+
{{ stats.flagged_posts | format_number }}
+
+
+ + {{ "%.2f" | format(100.0 * stats.flagged_posts / (stats.total_scored_posts or 1)) }}% + + of scored posts +
+
+
+
+
+ + +
+
+
Flagged Mentions
+
{{ stats.flagged_mentions | format_number }}
+
+
+ + {{ "%.2f" | format(100.0 * stats.flagged_mentions / (stats.total_scored_mentions or 1)) }}% + + of scored mentions +
+
+
+
+
+ + +
+
+
Average Toxicity
+
{{ "%.1f" | format(100.0 * ((stats.avg_toxicity_posts + stats.avg_toxicity_mentions) / 2.0)) }}%
+
+
+ Posts: {{ "%.2f" | format(100.0 * stats.avg_toxicity_posts) }}% + + Mentions: {{ "%.2f" | format(100.0 * stats.avg_toxicity_mentions) }}% +
+
+
+
+
+
+ + +
+
Toxicity Trends Over Time
+
+ +
+
+ + +
+
Toxicity by Category
+
+ +
+
+ + +
+
Recent Analysis Runs
+ {% if runs %} +
+ + + + + + + + + + + + + + {% for run in runs[:5] %} + + + + + + + + + + {% endfor %} + +
StartedDurationPosts ScoredMentions ScoredErrorsCostStatus
{{ run.started_at | time_ago }}{% if run.duration_secs is not none %}{{ "%.0f" | format(run.duration_secs | float) }}s{% else %}—{% endif %}{{ run.posts_scored | format_number }}{{ run.mentions_scored | format_number }}{{ run.errors }}${{ "%.4f" | format(run.cost_usd | default(0) | float) }} + + {{ run.status }} + +
+
+ {% else %} +
+

No analysis runs yet. Start a new analysis to see results here.

+
+ {% endif %} +
+ + +
+ +
+ + + + +{% endblock %} diff --git a/app/templates/base.html b/app/templates/base.html index d370f5a..462be28 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -242,6 +242,7 @@ Dashboard Accounts Statuses + Analysis Export CSV API diff --git a/app/templates/flagged.html b/app/templates/flagged.html new file mode 100644 index 0000000..ec8ca2c --- /dev/null +++ b/app/templates/flagged.html @@ -0,0 +1,785 @@ +{% extends "base.html" %} + +{% block title %}Flagged Content{% endblock %} + +{% block content %} +
+ + + + +
+
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+
+ + + {% if items %} + + {% macro sort_header(col, label) %} + {% set new_dir = 'asc' if (sort == col and direction == 'desc') else 'desc' %} + + {{ label }} + {% if sort == col %} + {{ '▼' if direction == 'desc' else '▲' }} + {% endif %} + + {% endmacro %} + +
+ + + + + + + + + + + + + + {% for item in items %} + + + + + + + + + + + + + + + + + + + + + + + {% endfor %} + +
Type{{ sort_header('author_handle', 'Author') }}Content{{ sort_header('overall', 'Score') }}Category{{ sort_header('created_at', 'Created') }}Review
+ + {% if item.item_type == 'post' %} + Post + {% elif item.item_type == 'reply' %} + Reply + {% elif item.item_type == 'mention' %} + Mention + {% endif %} + + + {% if item.author_handle %} + + @{{ item.author_handle }} + + {% else %} + {{ item.author_did[:30] }}… + {% endif %} + {% if item.item_type == 'mention' and item.mentioned_handle %} + + + @{{ item.mentioned_handle }} + + {% endif %} + + {% if item.source_type == 'post' %} + + {{ item.text | truncate_text(200) }} + + {% else %} + {# Convert at://did:plc:xxx/app.bsky.feed.post/yyy to https://bsky.app/profile/handle/post/yyy #} + {% set uri_parts = item.item_id.replace('at://', '').split('/') %} + {% if uri_parts|length >= 3 and item.author_handle %} + + {{ item.text | truncate_text(200) }} + + {% else %} + {{ item.text | truncate_text(200) }} + {% endif %} + {% endif %} + +
+ {% set score_pct = (item.overall * 100) | int %} + {% if item.overall < 0.3 %} + {% set bar_class = 'score-bar-low' %} + {% elif item.overall < 0.6 %} + {% set bar_class = 'score-bar-medium' %} + {% else %} + {% set bar_class = 'score-bar-high' %} + {% endif %} +
+ {{ "%.2f" | format(item.overall) }} +
+
+ {% if item.top_category %} + {{ item.top_category }} + {% else %} + + {% endif %} + + + {{ item.created_at | time_ago }} + + +
+ + + +
+
+
+ + + {% if total_pages > 1 %} + + {% endif %} + + {% else %} + +
+

+

No flagged content found

+

Try adjusting your filters or threshold

+
+ {% endif %} +
+{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block extra_js %} + +{% endblock %} diff --git a/app/web.py b/app/web.py index bfc64ee..1ce2182 100644 --- a/app/web.py +++ b/app/web.py @@ -23,6 +23,58 @@ logger = logging.getLogger(__name__) app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-key") + +# Template filters +@app.template_filter('format_number') +def format_number(value): + """Format number with commas.""" + try: + return f"{int(value):,}" + except (ValueError, TypeError): + return value + + +@app.template_filter('time_ago') +def time_ago(dt): + """Convert datetime to time ago string.""" + if not dt: + return "Never" + from datetime import datetime, timezone + now = datetime.now(timezone.utc) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + diff = now - dt + + seconds = diff.total_seconds() + if seconds < 60: + return "just now" + elif seconds < 3600: + mins = int(seconds / 60) + return f"{mins}m ago" + elif seconds < 86400: + hours = int(seconds / 3600) + return f"{hours}h ago" + elif seconds < 604800: + days = int(seconds / 86400) + return f"{days}d ago" + else: + weeks = int(seconds / 604800) + return f"{weeks}w ago" + + +@app.template_filter('truncate_text') +def truncate_text(text, length=200): + """Truncate text to specified length.""" + if not text: + return "" + from bs4 import BeautifulSoup + # Strip HTML tags first + text = BeautifulSoup(text, 'html.parser').get_text() + if len(text) <= length: + return text + return text[:length] + "..." + + # Initialize database on startup with app.app_context(): init_db() diff --git a/docker-compose.yml b/docker-compose.yml index 7c2d970..06674f8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,6 +40,7 @@ services: environment: DATABASE_URL: postgresql://collector:${POSTGRES_PASSWORD:-collector_secret}@db:5432/mastodon_collector POLL_INTERVAL_SECONDS: ${POLL_INTERVAL_SECONDS:-14400} + OPENAI_API_KEY: ${OPENAI_API_KEY} volumes: - ./accounts.txt:/app/accounts.txt depends_on: