diff --git a/app/models.py b/app/models.py index d9c7ec5..1f2ee16 100644 --- a/app/models.py +++ b/app/models.py @@ -79,19 +79,49 @@ class Attachments(Base): __tablename__ = 'attachments' id = Column(Integer, primary_key=True, autoincrement=True) - media_id = Column(Integer, index=True, nullable=False) - media_type = Column(String(256), index=False) + media_id = Column(String(32), index=True, nullable=False) url = Column(String(256), index=False) preview_url = Column(String(256), index=False) description = Column(String(2048), index=False) - posts = relationship("Posts", back_populates="media_attachments") + post_id = Column(Integer, ForeignKey("posts.id")) + type = Column(String(256), index=False) def __repr__(self) -> str: return ( f"" + f"description={self.description}>" ) + def __init__(self, session: Session, media_id: str, post_id: int) -> None: + + self.media_id = media_id + self.post_id = post_id + + session.add(self) + session.commit() + + @classmethod + def get_or_create(cls, session: Session, media_id: str, + post_id: int) -> "Attachments": + """ + Return any existing Attachment with this id or create a new one + """ + + try: + rec = ( + session.execute( + select(cls) + .where( + cls.media_id == media_id, + cls.post_id == post_id + ) + ).scalar_one() + ) + except NoResultFound: + rec = Attachments(session, media_id, post_id) + + return rec + class Hashtags(Base): __tablename__ = 'hashtags' @@ -112,7 +142,6 @@ class Hashtags(Base): ) def __init__(self, session: Session, name: str, url: str) -> None: - self.name = name self.url = url @@ -151,12 +180,10 @@ class Posts(Base): account_id = Column(Integer, ForeignKey('accounts.id'), nullable=True) account = relationship("Accounts", back_populates="posts") - parent_id = Column(Integer, ForeignKey("posts.id")) reblog = relationship("Posts") + child_id = Column(Integer, ForeignKey("posts.id")) - media_attachments_id = Column(Integer, ForeignKey('attachments.id'), - nullable=True) - media_attachments = relationship("Attachments", back_populates="posts") + media_attachments = relationship("Attachments") posts_to_tags = relationship("PostTags", back_populates="post") hashtags = association_proxy("posts_to_tags", "hashtag") diff --git a/app/urma.py b/app/urma.py index 5db8b50..ae99a2a 100755 --- a/app/urma.py +++ b/app/urma.py @@ -50,44 +50,53 @@ TESTDATA = "/home/kae/git/urma/hometl.pickle" Base.metadata.create_all(engine) # mastodon = Mastodon(access_token=Config.ACCESS_TOKEN) + +def process_post(post): + rec = Posts.get_or_create(session, str(post.id)) + if rec.account_id is not None: + # We already have this post + return + + # Create account record if needed + account_rec = Accounts.get_or_create(session, str(post.account.id)) + if account_rec.username is None: + account_rec.username = post.account.username + account_rec.acct = post.account.acct + account_rec.display_name = post.account.display_name + account_rec.bot = post.account.bot + account_rec.url = post.account.url + rec.account_id = account_rec.id + + # Create hashtag records as needed + for tag in post.tags: + hashtag = Hashtags.get_or_create(session, tag.name, tag.url) + rec.hashtags.append(hashtag) + + # Handle media + for media in post.media_attachments: + media_rec = Attachments.get_or_create(session, str(media.id), rec.id) + if not media_rec.type: + media_rec.type = media.type + media_rec.url = media.url + media_rec.preview_url = media.preview_url + media_rec.description = media.description + + rec.account_id = account_rec.id + rec.created_at = post.created_at + rec.uri = post.uri + rec.url = post.url + rec.content = post.content + + if post.reblog: + rec.child_id = process_post(post.reblog).id + + return rec + + # Data for development with open(TESTDATA, "rb") as inp: hometl = pickle.load(inp) with Session() as session: for post in hometl: - - rec = Posts.get_or_create(session, str(post.id)) - if rec.account_id is not None: - # We already have this post - continue - - # Create account record if needed - account_rec = Accounts.get_or_create(session, str(post.account.id)) - if account_rec.username is None: - account_rec.username = post.account.username - account_rec.acct = post.account.acct - account_rec.display_name = post.account.display_name - account_rec.bot = post.account.bot - account_rec.url = post.account.url - - # Create hashtag records as needed - for tag in post.tags: - hashtag = Hashtags.get_or_create(session, tag.name, tag.url) - rec.hashtags.append(hashtag) - - rec.account_id = account_rec.id - rec.created_at = post.created_at - rec.uri = post.uri - rec.url = post.url - rec.content = post.content - # reblog FIXME - # media FIXME - # posttags FIXME - # rating TBD -ipdb.set_trace() - - -# Parse timeline -# for post in hometl: -# post = Posts() + process_post(post)