"""Flask web application for managing monitored accounts and viewing collected data.""" import os import logging from datetime import datetime, timezone from flask import Flask, render_template, request, redirect, url_for, flash, jsonify from sqlalchemy import func, desc from app.db import ( init_db, get_session, MonitoredAccount, Status, Mention, CollectionLog, ) from app.mastodon_api import lookup_account, MastodonAPIError logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = Flask(__name__) app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-key") # Template filters @app.template_filter('format_number') def format_number(value): """Format number with commas.""" try: return f"{int(value):,}" except (ValueError, TypeError): return value @app.template_filter('time_ago') def time_ago(dt): """Convert datetime to time ago string.""" if not dt: return "Never" from datetime import datetime, timezone now = datetime.now(timezone.utc) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) diff = now - dt seconds = diff.total_seconds() if seconds < 60: return "just now" elif seconds < 3600: mins = int(seconds / 60) return f"{mins}m ago" elif seconds < 86400: hours = int(seconds / 3600) return f"{hours}h ago" elif seconds < 604800: days = int(seconds / 86400) return f"{days}d ago" else: weeks = int(seconds / 604800) return f"{weeks}w ago" @app.template_filter('truncate_text') def truncate_text(text, length=200): """Truncate text to specified length.""" if not text: return "" from bs4 import BeautifulSoup # Strip HTML tags first text = BeautifulSoup(text, 'html.parser').get_text() if len(text) <= length: return text return text[:length] + "..." @app.template_filter('encode_uri') def encode_uri(uri): """URL encode a URI for use in query parameters.""" from urllib.parse import quote return quote(str(uri), safe='') # Initialize database on startup with app.app_context(): init_db() @app.route("/") def index(): """Dashboard overview.""" session = get_session() try: accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all() total_statuses = session.query(func.count(Status.id)).scalar() or 0 total_posts = session.query(func.count(Status.id)).filter(Status.status_type == "post").scalar() or 0 total_replies = session.query(func.count(Status.id)).filter(Status.status_type == "reply").scalar() or 0 total_mentions = session.query(func.count(Status.id)).filter(Status.status_type == "mention").scalar() or 0 total_reblogs = session.query(func.count(Status.id)).filter(Status.status_type == "reblog").scalar() or 0 # Per-account stats account_stats = [] for acct in accounts: count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0 last_log = ( session.query(CollectionLog) .filter_by(account_db_id=acct.id) .order_by(desc(CollectionLog.started_at)) .first() ) account_stats.append({ "account": acct, "status_count": count, "last_log": last_log, }) # Recent collection logs recent_logs = ( session.query(CollectionLog) .order_by(desc(CollectionLog.started_at)) .limit(20) .all() ) return render_template( "index.html", account_stats=account_stats, total_statuses=total_statuses, total_posts=total_posts, total_replies=total_replies, total_mentions=total_mentions, total_reblogs=total_reblogs, recent_logs=recent_logs, ) finally: session.close() @app.route("/accounts") def accounts_list(): """List all monitored accounts.""" session = get_session() try: accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.instance, MonitoredAccount.username).all() return render_template("accounts.html", accounts=accounts) finally: session.close() @app.route("/accounts/add", methods=["POST"]) def accounts_add(): """Add a new account to monitor.""" handle = request.form.get("handle", "").strip().lstrip("@") if "@" not in handle: flash("Invalid handle format. Use @user@instance.social", "error") return redirect(url_for("accounts_list")) username, instance = handle.split("@", 1) if not username or not instance: flash("Invalid handle format. Use @user@instance.social", "error") return redirect(url_for("accounts_list")) session = get_session() try: existing = session.query(MonitoredAccount).filter_by(username=username, instance=instance).first() if existing: if not existing.is_active: existing.is_active = True session.commit() flash(f"Re-activated {existing.handle}", "success") else: flash(f"{existing.handle} is already being monitored", "info") return redirect(url_for("accounts_list")) # Try to resolve the account first try: data = lookup_account(instance, username) acct = MonitoredAccount( username=username, instance=instance, account_id=data["id"], display_name=data.get("display_name", ""), avatar_url=data.get("avatar", ""), note=data.get("note", ""), is_active=True, ) except MastodonAPIError as e: logger.warning("Could not resolve account @%s@%s: %s — adding anyway", username, instance, e) acct = MonitoredAccount( username=username, instance=instance, is_active=True, ) session.add(acct) session.commit() flash(f"Added {acct.handle} to monitoring list", "success") return redirect(url_for("accounts_list")) finally: session.close() @app.route("/accounts//toggle", methods=["POST"]) def accounts_toggle(account_id): """Toggle an account's active status.""" session = get_session() try: acct = session.query(MonitoredAccount).get(account_id) if acct: acct.is_active = not acct.is_active session.commit() state = "activated" if acct.is_active else "paused" flash(f"{state.capitalize()} monitoring for {acct.handle}", "success") return redirect(url_for("accounts_list")) finally: session.close() @app.route("/accounts//delete", methods=["POST"]) def accounts_delete(account_id): """Delete an account and all its collected data.""" session = get_session() try: acct = session.query(MonitoredAccount).get(account_id) if acct: handle = acct.handle # Delete associated statuses (cascades to mentions, media, tags) session.query(Status).filter_by(account_db_id=acct.id).delete() session.query(CollectionLog).filter_by(account_db_id=acct.id).delete() session.delete(acct) session.commit() flash(f"Deleted {handle} and all collected data", "success") return redirect(url_for("accounts_list")) finally: session.close() @app.route("/statuses") def statuses_list(): """Browse collected statuses with filters.""" session = get_session() try: page = request.args.get("page", 1, type=int) per_page = request.args.get("per_page", 50, type=int) account_id = request.args.get("account_id", type=int) status_type = request.args.get("type", "") search = request.args.get("q", "").strip() query = session.query(Status).join(MonitoredAccount) if account_id: query = query.filter(Status.account_db_id == account_id) if status_type: query = query.filter(Status.status_type == status_type) if search: query = query.filter(Status.text_content.ilike(f"%{search}%")) total = query.count() statuses = ( query.order_by(desc(Status.created_at)) .offset((page - 1) * per_page) .limit(per_page) .all() ) accounts = session.query(MonitoredAccount).order_by(MonitoredAccount.username).all() total_pages = max(1, (total + per_page - 1) // per_page) return render_template( "statuses.html", statuses=statuses, accounts=accounts, page=page, per_page=per_page, total=total, total_pages=total_pages, current_account_id=account_id, current_type=status_type, search=search, ) finally: session.close() @app.route("/statuses/") def status_detail(status_db_id): """View a single status with all details.""" session = get_session() try: status = session.query(Status).get(status_db_id) if not status: flash("Status not found", "error") return redirect(url_for("statuses_list")) return render_template("status_detail.html", status=status) finally: session.close() @app.route("/api/stats") def api_stats(): """JSON API endpoint for stats (useful for your analysis pipeline).""" session = get_session() try: stats = { "total_statuses": session.query(func.count(Status.id)).scalar() or 0, "by_type": {}, "accounts": [], } for stype in ["post", "reply", "mention", "reblog"]: stats["by_type"][stype] = ( session.query(func.count(Status.id)).filter(Status.status_type == stype).scalar() or 0 ) accounts = session.query(MonitoredAccount).filter_by(is_active=True).all() for acct in accounts: count = session.query(func.count(Status.id)).filter(Status.account_db_id == acct.id).scalar() or 0 stats["accounts"].append({ "handle": acct.handle, "status_count": count, "last_collected": acct.last_collected_at.isoformat() if acct.last_collected_at else None, }) return jsonify(stats) finally: session.close() @app.route("/api/statuses") def api_statuses(): """JSON API endpoint for statuses (for your analysis pipeline).""" session = get_session() try: page = request.args.get("page", 1, type=int) per_page = min(request.args.get("per_page", 100, type=int), 500) account_id = request.args.get("account_id", type=int) status_type = request.args.get("type", "") since = request.args.get("since", "") # ISO datetime query = session.query(Status) if account_id: query = query.filter(Status.account_db_id == account_id) if status_type: query = query.filter(Status.status_type == status_type) if since: query = query.filter(Status.created_at >= since) total = query.count() statuses = ( query.order_by(desc(Status.created_at)) .offset((page - 1) * per_page) .limit(per_page) .all() ) return jsonify({ "total": total, "page": page, "per_page": per_page, "statuses": [ { "id": s.id, "status_id": s.status_id, "account": s.account.handle, "url": s.url, "content": s.content, "text_content": s.text_content, "visibility": s.visibility, "created_at": s.created_at.isoformat() if s.created_at else None, "language": s.language, "status_type": s.status_type, "in_reply_to_id": s.in_reply_to_id, "replies_count": s.replies_count, "reblogs_count": s.reblogs_count, "favourites_count": s.favourites_count, "mentions": [ {"acct": m.mentioned_acct, "url": m.mentioned_url} for m in s.mentions ], "tags": [t.name for t in s.tags], } for s in statuses ], }) finally: session.close() @app.route("/export") def export_csv(): """Export statuses as CSV for analysis.""" from io import StringIO import csv session = get_session() try: account_id = request.args.get("account_id", type=int) status_type = request.args.get("type", "") query = session.query(Status).join(MonitoredAccount) if account_id: query = query.filter(Status.account_db_id == account_id) if status_type: query = query.filter(Status.status_type == status_type) statuses = query.order_by(desc(Status.created_at)).all() output = StringIO() writer = csv.writer(output) writer.writerow([ "id", "account", "status_type", "created_at", "url", "text_content", "language", "visibility", "in_reply_to_id", "replies_count", "reblogs_count", "favourites_count", "mentions", "tags", "sensitive", "spoiler_text", ]) for s in statuses: mentions_str = "; ".join(m.mentioned_acct for m in s.mentions) tags_str = "; ".join(t.name for t in s.tags) writer.writerow([ s.status_id, s.account.handle, s.status_type, s.created_at.isoformat() if s.created_at else "", s.url, s.text_content, s.language, s.visibility, s.in_reply_to_id, s.replies_count, s.reblogs_count, s.favourites_count, mentions_str, tags_str, s.sensitive, s.spoiler_text, ]) from flask import Response return Response( output.getvalue(), mimetype="text/csv", headers={"Content-Disposition": "attachment; filename=mastodon_statuses.csv"}, ) finally: session.close() @app.route("/analysis") def analysis_dashboard(): """Toxicity analysis dashboard.""" from app.analysis_helpers import ( get_analysis_stats, get_toxicity_trend, get_category_averages, get_recent_analysis_runs, TOXICITY_CATEGORIES, ) import json session = get_session() try: stats = get_analysis_stats(session) trend = get_toxicity_trend(session, weeks=12) categories = get_category_averages(session) runs = get_recent_analysis_runs(session, limit=5) # Prepare chart data trend_json = json.dumps([ { "week": r["week"].strftime("%Y-%m-%d") if r["week"] else "", "avg_toxicity": round(float(r["avg_toxicity"]), 4), "flagged_posts": int(r["flagged_posts"]), "flagged_mentions": int(r["flagged_mentions"]), } for r in trend ]) categories_json = json.dumps({k: round(float(v), 4) for k, v in categories.items()}) return render_template( "analysis.html", stats=stats, trend_json=trend_json, categories_json=categories_json, categories=TOXICITY_CATEGORIES, runs=runs, ) finally: session.close() @app.route("/analysis/flagged") def analysis_flagged(): """View flagged content.""" from app.analysis_helpers import ( get_flagged_content, get_accounts_for_select, TOXICITY_CATEGORIES, ) session = get_session() try: category = request.args.get("category") or None account_id = request.args.get("account_id", type=int) or None threshold = request.args.get("threshold", 0.5, type=float) review_status = request.args.get("review_status") or None date_from = request.args.get("date_from") or None date_to = request.args.get("date_to") or None sort = request.args.get("sort", "overall") direction = request.args.get("dir", "desc") page = max(1, request.args.get("page", 1, type=int)) per_page = 50 items, total = get_flagged_content( session, category=category, account_id=account_id, threshold=threshold, review_status=review_status, date_from=date_from, date_to=date_to, sort=sort, direction=direction, limit=per_page, offset=(page - 1) * per_page, ) total_pages = max(1, (total + per_page - 1) // per_page) accounts = get_accounts_for_select(session) return render_template( "flagged.html", items=items, total=total, page=page, total_pages=total_pages, accounts=accounts, categories=TOXICITY_CATEGORIES, category=category or "", account_id=account_id or "", threshold=threshold, review_status=review_status or "", date_from=date_from or "", date_to=date_to or "", sort=sort, direction=direction, ) finally: session.close() @app.route("/api/review/submit", methods=["POST"]) def api_review_submit(): """Submit a human review for a flagged status.""" from sqlalchemy import text data = request.get_json() logger.info(f"Review submission received: {data}") status_id = data.get("status_id") review_status = data.get("review_status") if not all([status_id, review_status]): logger.error(f"Missing fields - status_id: {status_id}, review_status: {review_status}") return jsonify({"error": "Missing required fields"}), 400 if review_status not in ["correct", "incorrect", "unsure"]: logger.error(f"Invalid review_status: {review_status}") return jsonify({"error": "Invalid review_status"}), 400 session = get_session() try: result = session.execute(text(""" UPDATE toxicity_scores SET human_reviewed = true, review_status = :review_status, reviewed_at = NOW() WHERE status_id = :status_id """), {"review_status": review_status, "status_id": status_id}) session.commit() logger.info(f"Review saved for status_id {status_id}: {review_status} (rows affected: {result.rowcount})") return jsonify({"success": True, "message": "Review submitted"}), 200 except Exception as e: session.rollback() logger.error(f"Failed to submit review: {e}") return jsonify({"error": str(e)}), 500 finally: session.close() if __name__ == "__main__": app.run(host="0.0.0.0", port=5000, debug=True)