From c7757efbf678d1e7f06f372bd0d9f4ba17df1e8f Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Sat, 21 Jan 2023 23:11:35 +0000 Subject: [PATCH] Untracked hashtags report --- app/config.py | 2 ++ app/urma.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/app/config.py b/app/config.py index 0937481..89808dc 100644 --- a/app/config.py +++ b/app/config.py @@ -21,3 +21,5 @@ class Config(object): MAX_POSTS_TO_FETCH = 200 POINTS_BOOSTED = 1 POINTS_FAVOURITED = 1 + TOP_HASHTAGS_TO_REPORT = 3 + TOP_POSTS_TO_REPORT = 3 diff --git a/app/urma.py b/app/urma.py index 3ac155f..2e98e46 100755 --- a/app/urma.py +++ b/app/urma.py @@ -1,5 +1,6 @@ #! /usr/bin/env python +import argparse import datetime import ipdb import os @@ -7,6 +8,7 @@ import pickle import random import requests import stackprinter +import subprocess import sys from config import Config @@ -28,6 +30,10 @@ from models import ( Posts, PostTags, ) +from sqlalchemy import ( + func, + select, +) from typing import List, Optional, Union @@ -101,7 +107,7 @@ class MastodonAPI: _ = self.mastodon.status_unbookmark(post_id) -def main() -> None: +def update_database() -> None: """ Main loop """ @@ -142,6 +148,31 @@ def get_and_process_favourited(session, mastapi): favourited = mastapi.mastodon.fetch_next(favourited) +def get_database_name(): + """Return database name as string""" + + with Session() as session: + dbname = session.bind.engine.url.database + + return dbname + + +def get_version_string(): + """Return Urma version as string""" + + try: + return str( + subprocess.check_output( + ['git', 'describe'], stderr=subprocess.STDOUT + ) + ).strip('\'b\\n') + except subprocess.CalledProcessError as exc_info: + gitproc = subprocess.Popen(['git', 'rev-parse', 'HEAD'], + stdout=subprocess.PIPE) + (stdout, _) = gitproc.communicate() + return stdout.strip()[:7].decode("utf-8") + + def process_bookmarked_posts(session: Session, posts: List[Posts], me_id: int) -> bool: """ @@ -241,6 +272,46 @@ def _process_post(session: Session, post: Posts, me_id) -> Posts: return rec +def report(): + """Print report""" + + print(f"Urma version: {get_version_string()}") + print(f"Database: {get_database_name()}") + print(f"Date: {datetime.datetime.now().strftime('%c')}") + print() + + with Session() as session: + # Find the most popular hashtags that we don't follow + print("Hashtags you don't follow that feature in posts you like") + print("--------------------------------------------------------") + top_unfollowed_tags = ( + session.execute( + select(Hashtags, func.count(Hashtags.name)) + .join(PostTags).join(Posts) + .where(Posts.favourited == 1, Hashtags.followed == 0) + .group_by(Hashtags.name) + .order_by(func.count(Hashtags.name).desc()) + .limit(Config.TOP_HASHTAGS_TO_REPORT)) + .all() + ) + + # How many times was each hashtag in a post we didnt' like? + for (hashtag, like) in top_unfollowed_tags: + dislike = ( + session.execute( + select(func.count(Posts.id)) + .join(PostTags).join(Hashtags) + .where(Posts.favourited == 0, Hashtags.id == hashtag.id) + ).scalars() + .all()[0] + ) + + print( + f"Hashtag {hashtag.name} {like=}, {dislike=} " + f"({like * 100 / (like + dislike):.2f}% liked)" + ) + + def update_followed_accounts(session: Session, mastapi: MastodonAPI) -> None: """ Retrieve list of followed accounts and update accounts @@ -313,10 +384,31 @@ if __name__ == "__main__": If command line arguments given, carry out requested function and exit. Otherwise run full application. """ - try: Base.metadata.create_all(engine) - sys.exit(main()) + + p = argparse.ArgumentParser() + # Only allow at most one option to be specified + group = p.add_mutually_exclusive_group() + group.add_argument('-u', '--update', + action="store_true", dest="update_database", + default=False, help="Update database from Mastodon") + group.add_argument('-r', '--report', + action="store_true", dest="report", + default=False, help="Report") + args = p.parse_args() + + # Run as required + if args.update_database: + log.debug("Updating database") + update_database() + elif args.report: + log.debug("Report") + report() + else: + # For now, default to updating database + update_database() + except Exception as exc: if os.environ["URMA_ENV"] != "DEVELOPMENT":