Mass loading but overrunning API limits
This commit is contained in:
parent
e6d8f10fe3
commit
b797746229
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,3 +5,4 @@ tags
|
||||
Session.vim
|
||||
.direnv
|
||||
.envrc
|
||||
testdata/
|
||||
|
||||
@ -19,4 +19,6 @@ class Config(object):
|
||||
MAIL_SERVER = os.environ.get('MAIL_SERVER') or "woodlands.midnighthax.com"
|
||||
MAIL_USERNAME = os.environ.get('MAIL_USERNAME')
|
||||
MAIL_USE_TLS = os.environ.get('MAIL_USE_TLS') is not None
|
||||
MAX_CONTENT_LENGTH = 4096
|
||||
MAX_POSTS_TO_FETCH = 2000
|
||||
NORMAL_COLOUR = "#f6f5f4"
|
||||
|
||||
@ -21,13 +21,13 @@ def ask_yes_no(title: str, question: str) -> bool:
|
||||
return button_reply == QMessageBox.Yes
|
||||
|
||||
|
||||
def format_username(account) -> str:
|
||||
def format_display_name(account) -> str:
|
||||
"""
|
||||
Format account username according to whether we follow that account
|
||||
Format account display name according to whether we follow that account
|
||||
or not.
|
||||
"""
|
||||
|
||||
username = account.username
|
||||
username = account.display_name
|
||||
if account.followed:
|
||||
colour = Config.FOLLOWED_COLOUR
|
||||
else:
|
||||
|
||||
@ -11,6 +11,7 @@ from sqlalchemy import (
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
func,
|
||||
Integer,
|
||||
select,
|
||||
String,
|
||||
@ -208,7 +209,8 @@ class Posts(Base):
|
||||
created_at = Column(DateTime, index=True, default=None)
|
||||
uri = Column(String(256), index=False)
|
||||
url = Column(String(256), index=False)
|
||||
content = Column(String(2048), index=False, default="")
|
||||
content = Column(String(Config.MAX_CONTENT_LENGTH), index=False,
|
||||
default="")
|
||||
account_id = Column(Integer, ForeignKey('accounts.id'), nullable=True)
|
||||
account = relationship("Accounts", back_populates="posts")
|
||||
|
||||
@ -232,11 +234,31 @@ class Posts(Base):
|
||||
session.add(self)
|
||||
session.commit()
|
||||
|
||||
@classmethod
|
||||
def get_unrated_after(cls, session: Session,
|
||||
post_id: int) -> Optional["Posts"]:
|
||||
"""
|
||||
Return earliest unrated Posts object after passed post_id, or None
|
||||
if there isn't one.
|
||||
"""
|
||||
|
||||
return (
|
||||
session.scalars(
|
||||
select(cls)
|
||||
.where(
|
||||
(cls.rating.is_(None)),
|
||||
(cls.post_id > post_id)
|
||||
)
|
||||
.order_by(cls.post_id.asc())
|
||||
.limit(1)
|
||||
).first()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_unrated_before(cls, session: Session,
|
||||
post_id: int) -> Optional["Posts"]:
|
||||
"""
|
||||
Return latest unrated Posts object before past post_id, or None
|
||||
Return latest unrated Posts object before passed post_id, or None
|
||||
if there isn't one.
|
||||
"""
|
||||
|
||||
@ -259,7 +281,6 @@ class Posts(Base):
|
||||
is not a boosted post, or None if there isn't one.
|
||||
"""
|
||||
|
||||
print("get_unrated_newest")
|
||||
return (
|
||||
session.scalars(
|
||||
select(cls)
|
||||
@ -269,6 +290,36 @@ class Posts(Base):
|
||||
).first()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_unrated_oldest(cls, session: Session) -> Optional["Posts"]:
|
||||
"""
|
||||
Return oldest Posts object that has not been rated and which
|
||||
is not a boosted post, or None if there isn't one.
|
||||
"""
|
||||
|
||||
return (
|
||||
session.scalars(
|
||||
select(cls)
|
||||
.where(cls.rating.is_(None))
|
||||
.order_by(cls.post_id.asc())
|
||||
.limit(1)
|
||||
).first()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_by_post_id(cls, session: Session, post_id: str) -> "Posts":
|
||||
"""
|
||||
Return post identified by post_id or None
|
||||
"""
|
||||
|
||||
return (
|
||||
session.scalars(
|
||||
select(cls)
|
||||
.where(cls.post_id == post_id)
|
||||
.limit(1)
|
||||
).first()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_or_create(cls, session: Session, post_id: str) -> "Posts":
|
||||
"""
|
||||
@ -287,6 +338,14 @@ class Posts(Base):
|
||||
|
||||
return rec
|
||||
|
||||
@staticmethod
|
||||
def max_post_id(session):
|
||||
"""
|
||||
Return the maximum post_id
|
||||
"""
|
||||
|
||||
return session.scalars(select(func.max(Posts.post_id))).first()
|
||||
|
||||
|
||||
class PostTags(Base):
|
||||
__tablename__ = 'post_tags'
|
||||
|
||||
@ -65,7 +65,7 @@ p, li { white-space: pre-wrap; }
|
||||
<property name="minimumSize">
|
||||
<size>
|
||||
<width>0</width>
|
||||
<height>181</height>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="frameShape">
|
||||
@ -75,7 +75,7 @@ p, li { white-space: pre-wrap; }
|
||||
<string/>
|
||||
</property>
|
||||
<property name="scaledContents">
|
||||
<bool>true</bool>
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QTextEdit" name="txtHashtags">
|
||||
|
||||
@ -29,10 +29,10 @@ class Ui_MainWindow(object):
|
||||
self.txtPost.setObjectName("txtPost")
|
||||
self.lblPicture = QtWidgets.QLabel(self.centralwidget)
|
||||
self.lblPicture.setGeometry(QtCore.QRect(10, 770, 351, 201))
|
||||
self.lblPicture.setMinimumSize(QtCore.QSize(0, 181))
|
||||
self.lblPicture.setMinimumSize(QtCore.QSize(0, 0))
|
||||
self.lblPicture.setFrameShape(QtWidgets.QFrame.StyledPanel)
|
||||
self.lblPicture.setText("")
|
||||
self.lblPicture.setScaledContents(True)
|
||||
self.lblPicture.setScaledContents(False)
|
||||
self.lblPicture.setObjectName("lblPicture")
|
||||
self.txtHashtags = QtWidgets.QTextEdit(self.centralwidget)
|
||||
self.txtHashtags.setGeometry(QtCore.QRect(370, 90, 331, 871))
|
||||
|
||||
295
app/urma.py
295
app/urma.py
@ -1,5 +1,6 @@
|
||||
#! /usr/bin/env python
|
||||
|
||||
import datetime
|
||||
import ipdb
|
||||
import os
|
||||
import pickle
|
||||
@ -11,7 +12,7 @@ import sys
|
||||
from config import Config
|
||||
from dbconfig import engine, Session, scoped_session
|
||||
from helpers import (
|
||||
format_username,
|
||||
format_display_name,
|
||||
index_ojects_by_parameter,
|
||||
send_mail,
|
||||
)
|
||||
@ -29,6 +30,7 @@ from models import (
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from PyQt5.QtCore import Qt
|
||||
from PyQt5.QtGui import (
|
||||
QImage,
|
||||
QPixmap,
|
||||
@ -85,50 +87,16 @@ class MastodonAPI:
|
||||
return self.mastodon.fetch_remaining(page1)
|
||||
|
||||
|
||||
class UnratedPosts:
|
||||
"""
|
||||
Return unrated posts one at a time
|
||||
"""
|
||||
|
||||
def __init__(self, session: Session) -> None:
|
||||
self.dataset = Posts.get_unrated_posts(session)
|
||||
self.pointer = None
|
||||
|
||||
def next(self) -> Posts:
|
||||
# Set to first record if this is the first time we're called
|
||||
if self.pointer is None:
|
||||
self.pointer = 0
|
||||
else:
|
||||
self.pointer += 1
|
||||
if self.pointer >= len(self.dataset):
|
||||
# We've reached end of dataset
|
||||
self.pointer = None
|
||||
return None
|
||||
else:
|
||||
return self.dataset[self.pointer]
|
||||
|
||||
def prev(self) -> Posts:
|
||||
# Set to last record if this is the first time we're called
|
||||
if self.pointer is None:
|
||||
self.pointer = len(self.dataset) - 1
|
||||
else:
|
||||
self.pointer -= 1
|
||||
if self.pointer < 0:
|
||||
# We've reached end of dataset
|
||||
self.pointer = None
|
||||
return None
|
||||
else:
|
||||
return self.dataset[self.pointer]
|
||||
|
||||
|
||||
class Window(QMainWindow, Ui_MainWindow):
|
||||
def __init__(self, parent=None) -> None:
|
||||
super().__init__(parent)
|
||||
self.setupUi(self)
|
||||
|
||||
# self.mastapi = MastodonAPI(Config.ACCESS_TOKEN)
|
||||
self.mastapi = MastodonAPI(Config.ACCESS_TOKEN)
|
||||
self.update_db()
|
||||
|
||||
self.current_post_id = None
|
||||
self.next_post = self.next
|
||||
|
||||
self.btnDislike.clicked.connect(self.dislike)
|
||||
self.btnFirst.clicked.connect(self.first)
|
||||
@ -165,13 +133,13 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
# Boosted
|
||||
if boosted_by:
|
||||
self.txtBoosted.setText(
|
||||
"Boosted by: " + format_username(boosted_by))
|
||||
"Boosted by: " + format_display_name(boosted_by))
|
||||
self.txtBoosted.show()
|
||||
else:
|
||||
self.txtBoosted.hide()
|
||||
|
||||
# Username
|
||||
self.txtUsername.setText(format_username(post.account))
|
||||
self.txtUsername.setText(format_display_name(post.account))
|
||||
|
||||
# Debug
|
||||
self.lblDebug.setText(str(post.id))
|
||||
@ -199,21 +167,23 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
|
||||
# Image
|
||||
if post.media_attachments:
|
||||
image = QImage()
|
||||
# TODO: handle multiple images, not just [0]
|
||||
url_image = post.media_attachments[0].preview_url
|
||||
image.loadFromData(requests.get(url_image).content)
|
||||
self.lblPicture.setPixmap(QPixmap(image))
|
||||
pixmap = QPixmap()
|
||||
pixmap.loadFromData(requests.get(url_image).content)
|
||||
s_pixmap = pixmap.scaled(self.lblPicture.size(),
|
||||
Qt.KeepAspectRatio)
|
||||
self.lblPicture.show()
|
||||
self.lblPicture.setPixmap(s_pixmap)
|
||||
else:
|
||||
self.lblPicture.hide()
|
||||
|
||||
def dislike(self):
|
||||
"""
|
||||
actions
|
||||
Mark a post as rated negatively
|
||||
"""
|
||||
|
||||
pass
|
||||
self.rate_post(rating=-1)
|
||||
|
||||
def first(self):
|
||||
"""
|
||||
@ -231,10 +201,10 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
|
||||
def like(self):
|
||||
"""
|
||||
actions
|
||||
Mark a post as rated positively
|
||||
"""
|
||||
|
||||
pass
|
||||
self.rate_post(rating=1)
|
||||
|
||||
def next(self) -> None:
|
||||
"""
|
||||
@ -245,17 +215,20 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
display newest unrated post.
|
||||
"""
|
||||
|
||||
# Get post to display, but don't process posts that are boosted
|
||||
# as they will be processed by the boosting post
|
||||
# Remember whether we're going forward or backwards through
|
||||
# posts
|
||||
self.next_post = self.next
|
||||
|
||||
# Get post to display
|
||||
with Session() as session:
|
||||
if self.current_post_id is None:
|
||||
post = Posts.get_unrated_newest(session)
|
||||
while post and post.reblogged_by_post:
|
||||
post = Posts.get_unrated_newest(session)
|
||||
else:
|
||||
post = Posts.get_unrated_before(session, self.current_post_id)
|
||||
while post and post.reblogged_by_post:
|
||||
post = Posts.get_unrated_before(session, post.post_id)
|
||||
# Don't process posts that are boosted as they will be
|
||||
# processed by the boosting post
|
||||
while post and post.reblogged_by_post:
|
||||
post = Posts.get_unrated_before(session, post.post_id)
|
||||
if not post:
|
||||
self.current_post_id = None
|
||||
show_OK("All done", "No more posts to process")
|
||||
@ -266,17 +239,169 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
|
||||
def prev(self):
|
||||
"""
|
||||
actions
|
||||
Display previous post. We work BACKWARDS through posts so
|
||||
"previous" is actually one newer.
|
||||
|
||||
If we are called with self.current_post_id set to None, retrieve and
|
||||
display oldest unrated post.
|
||||
"""
|
||||
|
||||
pass
|
||||
# Remember whether we're going forward or backwards through
|
||||
# posts
|
||||
self.next_post = self.prev
|
||||
|
||||
# Get post to display, but don't process posts that are boosted
|
||||
# as they will be processed by the boosting post
|
||||
with Session() as session:
|
||||
if self.current_post_id is None:
|
||||
post = Posts.get_unrated_oldest(session)
|
||||
else:
|
||||
post = Posts.get_unrated_after(session, self.current_post_id)
|
||||
# Don't process posts that are boosted as they will be
|
||||
# processed by the boosting post
|
||||
while post and post.reblogged_by_post:
|
||||
post = Posts.get_unrated_after(session, post.post_id)
|
||||
if not post:
|
||||
self.current_post_id = None
|
||||
show_OK("All done", "No more posts to process")
|
||||
return
|
||||
|
||||
self.current_post_id = post.post_id
|
||||
self.display(session, post)
|
||||
|
||||
def rate_post(self, rating: int) -> None:
|
||||
"""
|
||||
Add rating to current post
|
||||
"""
|
||||
|
||||
with Session() as session:
|
||||
post = Posts.get_by_post_id(session, self.current_post_id)
|
||||
post.rating = rating
|
||||
self.next_post()
|
||||
|
||||
def unsure(self):
|
||||
"""
|
||||
actions
|
||||
Mark a post as rated neutrally
|
||||
"""
|
||||
|
||||
pass
|
||||
self.rate_post(rating=0)
|
||||
|
||||
def update_db(self) -> None:
|
||||
"""
|
||||
Update database from Mastodon
|
||||
|
||||
Save a copy of downloaded data for debugging
|
||||
"""
|
||||
|
||||
with Session() as session:
|
||||
minimum_post_id = Posts.max_post_id(session)
|
||||
if not minimum_post_id:
|
||||
minimum_post_id = "1"
|
||||
posts_to_get = Config.MAX_POSTS_TO_FETCH
|
||||
reached_minimum = False
|
||||
hometl = []
|
||||
|
||||
while True:
|
||||
|
||||
# Create a filename to save data
|
||||
now = datetime.datetime.now()
|
||||
seq = 0
|
||||
while True:
|
||||
fname = (
|
||||
"testdata/" +
|
||||
now.strftime("%Y-%m-%d_%H:%M:%S_") +
|
||||
f"{seq:02d}.pickle"
|
||||
)
|
||||
if not os.path.isfile(fname):
|
||||
print(f"{fname=}")
|
||||
break
|
||||
seq += 1
|
||||
print(f"{seq=}")
|
||||
|
||||
# Fetch data
|
||||
if not hometl:
|
||||
print("Fetching first data...")
|
||||
hometl = self.mastapi.mastodon.timeline()
|
||||
else:
|
||||
print("Fetching next data...")
|
||||
hometl = self.mastapi.mastodon.fetch_next(hometl)
|
||||
print(f"Fetched additional {len(hometl)} posts")
|
||||
with open(fname, "wb") as f:
|
||||
pickle.dump(hometl, f)
|
||||
|
||||
for post in hometl:
|
||||
if str(post.id) <= minimum_post_id:
|
||||
reached_minimum = True
|
||||
break
|
||||
print(f"Processing {post.id=}")
|
||||
self._process_post(session, post)
|
||||
|
||||
posts_to_get -= len(hometl)
|
||||
print(f"{posts_to_get=}")
|
||||
if posts_to_get <= 0 or reached_minimum or not hometl:
|
||||
break
|
||||
|
||||
def _process_post(self, session: Session, post) -> Posts:
|
||||
"""
|
||||
Add passsed post to database
|
||||
"""
|
||||
|
||||
log.debug(f"{post.id=} processing")
|
||||
rec = Posts.get_or_create(session, str(post.id))
|
||||
if rec.account_id is not None:
|
||||
# We already have this post
|
||||
log.debug(f"{post.id=} already in db")
|
||||
return rec
|
||||
|
||||
# Create account record if needed
|
||||
log.debug(f"{post.id=} processing {post.account.id=}")
|
||||
account_rec = Accounts.get_or_create(session, str(post.account.id))
|
||||
if account_rec.username is None:
|
||||
log.debug(f"{post.id=} populating new account {post.account.id=}")
|
||||
account_rec.username = post.account.username
|
||||
account_rec.acct = post.account.acct
|
||||
account_rec.display_name = post.account.display_name
|
||||
account_rec.bot = post.account.bot
|
||||
account_rec.url = post.account.url
|
||||
rec.account_id = account_rec.id
|
||||
|
||||
# Create hashtag records as needed
|
||||
for tag in post.tags:
|
||||
log.debug(f"{post.id=} processing {tag.name=}")
|
||||
hashtag = Hashtags.get_or_create(session, tag.name, tag.url)
|
||||
rec.hashtags.append(hashtag)
|
||||
|
||||
# Handle media
|
||||
if post.media_attachments:
|
||||
for media in post.media_attachments:
|
||||
log.debug(f"{post.id=} processing {media.id=}")
|
||||
media_rec = Attachments.get_or_create(
|
||||
session, str(media.id), rec.id)
|
||||
if not media_rec.type:
|
||||
log.debug(f"{post.id=} {media.id=} new record")
|
||||
media_rec.type = media.type
|
||||
media_rec.url = media.url
|
||||
media_rec.preview_url = media.preview_url
|
||||
media_rec.description = media.description
|
||||
else:
|
||||
log.debug(f"{post.id=} {media.id=} already exists")
|
||||
else:
|
||||
log.debug(f"{post.id=} No media attachments")
|
||||
|
||||
rec.account_id = account_rec.id
|
||||
rec.created_at = post.created_at
|
||||
rec.uri = post.uri
|
||||
rec.url = post.url
|
||||
rec.content = post.content[:Config.MAX_CONTENT_LENGTH]
|
||||
log.debug(f"{post.id=} {post.content=}")
|
||||
|
||||
if post.reblog:
|
||||
log.debug(f"{post.id=} {post.reblog.id=}")
|
||||
rec.boosted_post_id = self._process_post(
|
||||
session, post.reblog).id
|
||||
log.debug(f"{post.id=} {rec.boosted_post_id=}")
|
||||
|
||||
return rec
|
||||
|
||||
def update_followed_accounts(self, session: Session) -> None:
|
||||
"""
|
||||
@ -346,54 +471,6 @@ class Window(QMainWindow, Ui_MainWindow):
|
||||
|
||||
# class HoldingPot:
|
||||
# def process_post(post):
|
||||
# rec = Posts.get_or_create(session, str(post.id))
|
||||
# if rec.account_id is not None:
|
||||
# # We already have this post
|
||||
# return
|
||||
#
|
||||
# # Create account record if needed
|
||||
# account_rec = Accounts.get_or_create(session, str(post.account.id))
|
||||
# if account_rec.username is None:
|
||||
# account_rec.username = post.account.username
|
||||
# account_rec.acct = post.account.acct
|
||||
# account_rec.display_name = post.account.display_name
|
||||
# account_rec.bot = post.account.bot
|
||||
# account_rec.url = post.account.url
|
||||
# rec.account_id = account_rec.id
|
||||
#
|
||||
# # Create hashtag records as needed
|
||||
# for tag in post.tags:
|
||||
# hashtag = Hashtags.get_or_create(session, tag.name, tag.url)
|
||||
# rec.hashtags.append(hashtag)
|
||||
#
|
||||
# # Handle media
|
||||
# for media in post.media_attachments:
|
||||
# media_rec = Attachments.get_or_create(session,
|
||||
# str(media.id), rec.id)
|
||||
# if not media_rec.type:
|
||||
# media_rec.type = media.type
|
||||
# media_rec.url = media.url
|
||||
# media_rec.preview_url = media.preview_url
|
||||
# media_rec.description = media.description
|
||||
#
|
||||
# rec.account_id = account_rec.id
|
||||
# rec.created_at = post.created_at
|
||||
# rec.uri = post.uri
|
||||
# rec.url = post.url
|
||||
# rec.content = post.content
|
||||
#
|
||||
# if post.reblogged_by_post:
|
||||
# rec.boosted_post_id = process_post(post.reblogged_by_post).id
|
||||
#
|
||||
# return rec
|
||||
#
|
||||
# # Data for development
|
||||
# with open(TESTDATA, "rb") as inp:
|
||||
# hometl = pickle.load(inp)
|
||||
#
|
||||
# with Session() as session:
|
||||
# for post in hometl:
|
||||
# process_post(post)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -418,3 +495,11 @@ if __name__ == "__main__":
|
||||
print("\033[1;31;47mUnhandled exception starts")
|
||||
stackprinter.show(style="darkbg")
|
||||
print("Unhandled exception ends\033[1;37;40m")
|
||||
|
||||
# # Data for development
|
||||
# with open(TESTDATA, "rb") as inp:
|
||||
# hometl = pickle.load(inp)
|
||||
#
|
||||
# with Session() as session:
|
||||
# for post in hometl:
|
||||
# process_post(post)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user