diff --git a/app/config.py b/app/config.py index a339023..3e8a0ed 100644 --- a/app/config.py +++ b/app/config.py @@ -7,7 +7,7 @@ class Config(object): # KAEID = 109568725613662482 # DEBUG_FUNCTIONS: List[Optional[str]] = [] # DEBUG_MODULES: List[Optional[str]] = ['dbconfig'] - DISPLAY_SQL = True + DISPLAY_SQL = False # ERRORS_FROM = ['noreply@midnighthax.com'] # ERRORS_TO = ['kae@midnighthax.com'] LOG_LEVEL_STDERR = logging.ERROR diff --git a/app/log.py b/app/log.py index fd0ed5f..6892ddf 100644 --- a/app/log.py +++ b/app/log.py @@ -76,7 +76,7 @@ def log_uncaught_exceptions(_ex_cls, ex, tb): logging.critical(''.join(traceback.format_tb(tb))) print("\033[1;37;40m") print(stackprinter.format(ex, style="darkbg2", add_summary=True)) - if os.environ["MM_ENV"] != "DEVELOPMENT": + if os.environ["URMA_ENV"] != "DEVELOPMENT": msg = stackprinter.format(ex) send_mail(Config.ERRORS_TO, Config.ERRORS_FROM, "Exception from musicmuster", msg) diff --git a/app/models.py b/app/models.py index fc384f8..d9c7ec5 100644 --- a/app/models.py +++ b/app/models.py @@ -10,6 +10,7 @@ from sqlalchemy import ( DateTime, ForeignKey, Integer, + select, String, ) @@ -19,6 +20,9 @@ from sqlalchemy.orm import ( declarative_base, relationship, ) +from sqlalchemy.orm.exc import ( + NoResultFound +) from config import Config from log import log @@ -30,10 +34,10 @@ class Accounts(Base): __tablename__ = 'accounts' id = Column(Integer, primary_key=True, autoincrement=True) - account_id = Column(Integer, index=True, nullable=False) - username = Column(String(256), index=True, nullable=False) - acct = Column(String(256), index=False, nullable=False) - display_name = Column(String(256), index=False, nullable=False) + account_id = Column(String(32), index=True, nullable=False) + username = Column(String(256), index=True, default=None) + acct = Column(String(256), index=False, default=None) + display_name = Column(String(256), index=False, default=None) bot = Column(Boolean, index=False, nullable=False, default=False) url = Column(String(256), index=False) followed = Column(Boolean, index=False, nullable=False, default=False) @@ -45,6 +49,31 @@ class Accounts(Base): f"followed={self.followed}>" ) + def __init__(self, session: Session, account_id: str) -> None: + + self.account_id = account_id + + session.add(self) + session.commit() + + @classmethod + def get_or_create(cls, session: Session, account_id: str) -> "Accounts": + """ + Return any existing account with this id or create a new one + """ + + try: + rec = ( + session.execute( + select(cls) + .where(cls.account_id == account_id) + ).scalar_one() + ) + except NoResultFound: + rec = Accounts(session, account_id) + + return rec + class Attachments(Base): __tablename__ = 'attachments' @@ -70,24 +99,51 @@ class Hashtags(Base): id = Column(Integer, primary_key=True, autoincrement=True) name = Column(String(256), index=True, nullable=False) url = Column(String(256), index=False) - posts = relationship("Posts", secondary="post_tags", backref="hashtags") - followed = Column(Boolean, index=False, nullable=False, default=False) - posttags = relationship("PostTags", back_populates="hashtag") - posts = association_proxy("posttags", "post") + tags_to_posts = relationship("PostTags", back_populates="hashtag") + posts = association_proxy("tags_to_posts", "post") + + followed = Column(Boolean, index=False, nullable=False, default=False) def __repr__(self) -> str: return ( f", followed={self.followed}>" + f"url={self.url}, followed={self.followed}>" ) + def __init__(self, session: Session, name: str, url: str) -> None: + + self.name = name + self.url = url + + session.add(self) + session.commit() + + @classmethod + def get_or_create(cls, session: Session, + name: str, url: str) -> "Hashtags": + """ + Return any existing hashtag with this name or create a new one + """ + + try: + rec = ( + session.execute( + select(cls) + .where(cls.name == name) + ).scalar_one() + ) + except NoResultFound: + rec = Hashtags(session, name, url) + + return rec + class Posts(Base): __tablename__ = 'posts' id = Column(Integer, primary_key=True, autoincrement=True) - post_id = Column(Integer, index=True, nullable=False) + post_id = Column(String(32), index=True, nullable=False) created_at = Column(DateTime, index=True, default=None) uri = Column(String(256), index=False) url = Column(String(256), index=False) @@ -102,20 +158,52 @@ class Posts(Base): nullable=True) media_attachments = relationship("Attachments", back_populates="posts") - posttags = relationship("PostTags", back_populates="post") + posts_to_tags = relationship("PostTags", back_populates="post") + hashtags = association_proxy("posts_to_tags", "hashtag") + rating = Column(Integer, index=True, default=None) def __repr__(self) -> str: return f"" + def __init__(self, session: Session, post_id) -> None: + + self.post_id = post_id + + session.add(self) + session.commit() + + @classmethod + def get_or_create(cls, session: Session, post_id: str) -> "Posts": + """ + Return any existing post with this id or create a new one + """ + + try: + rec = ( + session.execute( + select(cls) + .where(cls.post_id == post_id) + ).scalar_one() + ) + except NoResultFound: + rec = Posts(session, post_id) + + return rec + class PostTags(Base): __tablename__ = 'post_tags' id = Column(Integer, primary_key=True, autoincrement=True) + post_id = Column(Integer, ForeignKey('posts.id'), nullable=False, + index=True) + hashtag_id = Column(Integer, ForeignKey('hashtags.id'), nullable=False, + index=True) - post_id = Column(Integer, ForeignKey('posts.id'), nullable=False) - post = relationship(Posts, back_populates="posttags") + post = relationship(Posts, back_populates="posts_to_tags") + hashtag = relationship("Hashtags") - hashtag_id = Column(Integer, ForeignKey('hashtags.id'), nullable=False) - hashtag = relationship("Hashtags", back_populates="posttags") + def __init__(self, hashtag=None, post=None): + self.post = post + self.hashtag = hashtag diff --git a/app/urma.py b/app/urma.py index 38a67cb..5db8b50 100755 --- a/app/urma.py +++ b/app/urma.py @@ -1,9 +1,11 @@ #! /usr/bin/env python +import ipdb import pickle +import random from config import Config -from dbconfig import engine +from dbconfig import engine, Session, scoped_session from log import log from mastodon import Mastodon from models import ( @@ -12,8 +14,10 @@ from models import ( Base, Hashtags, Posts, + PostTags, ) +MAXINT = 2147483647 TESTDATA = "/home/kae/git/urma/hometl.pickle" # Mastodon.create_app( @@ -50,8 +54,39 @@ Base.metadata.create_all(engine) with open(TESTDATA, "rb") as inp: hometl = pickle.load(inp) -post = Posts() -import ipdb; ipdb.set_trace() +with Session() as session: + for post in hometl: + + rec = Posts.get_or_create(session, str(post.id)) + if rec.account_id is not None: + # We already have this post + continue + + # Create account record if needed + account_rec = Accounts.get_or_create(session, str(post.account.id)) + if account_rec.username is None: + account_rec.username = post.account.username + account_rec.acct = post.account.acct + account_rec.display_name = post.account.display_name + account_rec.bot = post.account.bot + account_rec.url = post.account.url + + # Create hashtag records as needed + for tag in post.tags: + hashtag = Hashtags.get_or_create(session, tag.name, tag.url) + rec.hashtags.append(hashtag) + + rec.account_id = account_rec.id + rec.created_at = post.created_at + rec.uri = post.uri + rec.url = post.url + rec.content = post.content + # reblog FIXME + # media FIXME + # posttags FIXME + # rating TBD +ipdb.set_trace() + # Parse timeline # for post in hometl: diff --git a/pyproject.toml b/pyproject.toml index bc276a6..8a731dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,3 +23,8 @@ build-backend = "poetry.core.masonry.api" [tool.mypy] mypy_path = "/home/kae/.cache/pypoetry/virtualenvs/urma-e3I_sS5U-py3.9:/home/kae/git/urma/app" plugins = "sqlalchemy.ext.mypy.plugin" + +[tool.vulture] +exclude = ["migrations"] +paths = ["app"] +make_whitelist = true