From bcc6634e348b82a37b47249887983f1dc5ee2ba3 Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Wed, 17 Aug 2022 11:28:10 +0100 Subject: [PATCH 1/7] Work on replacing existing music files --- app/models.py | 2 +- app/replace_files.py | 214 +++++++++++++++++++++++++++++++++++++++++++ poetry.lock | 14 ++- pyproject.toml | 1 + 4 files changed, 229 insertions(+), 2 deletions(-) create mode 100755 app/replace_files.py diff --git a/app/models.py b/app/models.py index 43858c5..1717225 100644 --- a/app/models.py +++ b/app/models.py @@ -593,7 +593,7 @@ class Tracks(Base): Config.MILLISECOND_SIGFIGS) * 1000 self.start_gap = leading_silence(audio) session.add(self) - session.flush() + session.commit() # @staticmethod # def remove_by_path(session: Session, path: str) -> None: diff --git a/app/replace_files.py b/app/replace_files.py new file mode 100755 index 0000000..54c25a8 --- /dev/null +++ b/app/replace_files.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# +# Script to replace existing files in parent directory. Typical usage: +# the current directory contains a "better" version of the file than the +# parent (eg, bettet bitrate). +# +# Actions: +# +# - check that the same filename is present in the parent directory +# - check that the artist and title tags are the same +# - append ".bak" to the version in the parent directory +# - move file to parent directory +# - normalise file +# - update duration, start_gap, fade_at, silence_at, mtime in database + +import glob +import os +import shutil + +from helpers import ( + fade_point, + get_audio_segment, + get_tags, + leading_silence, + trailing_silence, +) + +from models import Tracks +from dbconfig import Session +from thefuzz import process + +from string import ascii_lowercase as letters +from typing import List + + +def insensitive_glob(pattern): + def either(c): + return '[%s%s]' % (c.lower(), c.upper()) if c.isalpha() else c + return glob.glob(''.join(map(either, pattern))) + + +# Check file of same name exists in parent directory +source_dir = '/home/kae/music/Singles/tmp' # os.getcwd() +parent_dir = os.path.dirname(source_dir) +assert source_dir != parent_dir + +process_multiple_matches = True +do_processing = False +process_no_matches = True + +name_and_tags: List[str] = [] +name_not_tags: List[str] = [] +tags_not_name: List[str] = [] +multiple_similar: List[str] = [] +no_match: List[str] = [] +possibles: List[str] = [] + +print(f"{source_dir=}, {parent_dir=}") + + +def main(): + tracks = os.listdir(parent_dir) + for fname in os.listdir(source_dir): + parent_file = os.path.join(parent_dir, fname) + new_file = os.path.join(source_dir, fname) + us = get_tags(new_file) + us_t = us['title'] + us_a = us['artist'] + + if os.path.exists(parent_file): + # File exists, check tags + p = get_tags(parent_file) + p_t = p['title'] + p_a = p['artist'] + if ( + (str(p_t).lower() != str(us_t).lower()) or + (str(p_a).lower() != str(us_a).lower()) + ): + name_not_tags.append( + f" {fname=}, {p_t} → {us_t}, {p_a} → {us_a}") + process_track(new_file, parent_file, us_t, us_a) + continue + name_and_tags.append(new_file) + process_track(new_file, parent_file, us_t, us_a) + continue + + # Try to find a near match + stem = fname.split(".")[0] + matches = insensitive_glob(os.path.join(parent_dir, stem) + '*') + match_count = len(matches) + if match_count == 0: + no_match.append(f"{fname}, {us_t=}, {us_a=}") + print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") + # Try fuzzy search + d = {} + while True: + for i, match in enumerate( + [a[0] for a in process.extract(fname, tracks, limit=5)] + ): + d[i] = match + for k, v in d.items(): + print(f"{k}: {v}") + data = input("pick one, return to quit: ") + if data == "": + break + try: + key = int(data) + except ValueError: + continue + if key in d: + print("***KAE confirm with tags") + dst = d[key] + process_track(new_file, dst, us_t, us_a) + break + else: + continue + continue # from break after testing for "" in data + continue + if match_count > 1: + multiple_similar.append(fname + "\n " + "\n ".join(matches)) + if match_count <= 26 and process_multiple_matches: + print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") + d = {} + while True: + for i, match in enumerate(matches): + d[i] = match + for k, v in d.items(): + print(f"{k}: {v}") + data = input("pick one, return to quit: ") + if data == "": + break + try: + key = int(data) + except ValueError: + continue + if key in d: + dst = d[key] + process_track(new_file, dst, us_t, us_a) + break + else: + continue + continue # from break after testing for "" in data + # One match, check tags + sim_name = matches[0] + p = get_tags(sim_name) + p_t = p['title'] + p_a = p['artist'] + if ( + (str(p_t).lower() != str(us_t).lower()) or + (str(p_a).lower() != str(us_a).lower()) + ): + possibles.append( + f"File: {os.path.basename(sim_name)} → {fname}" + f"\n {p_t} → {us_t}\n {p_a} → {us_a}" + ) + process_track(new_file, sim_name, us_t, us_a) + continue + tags_not_name.append(f"Rename {os.path.basename(sim_name)} → {fname}") + process_track(new_file, sim_name, us_t, us_a) + + print(f"Name and tags match ({len(name_and_tags)}):") + # print(" \n".join(name_and_tags)) + # print() + + print(f"Name but not tags match ({len(name_not_tags)}):") + print(" \n".join(name_not_tags)) + print() + + print(f"Tags but not name match ({len(tags_not_name)}):") + # print(" \n".join(tags_not_name)) + # print() + + print(f"Multiple similar names ({len(multiple_similar)}):") + print(" \n".join(multiple_similar)) + print() + + print(f"Possibles: ({len(possibles)}):") + print(" \n".join(possibles)) + print() + + print(f"No match ({len(no_match)}):") + print(" \n".join(no_match)) + print() + + +def process_track(src, dst, title, artist): + + new_path = os.path.join(os.path.dirname(dst), os.path.basename(src)) + print(f"process_track:\n {src=}\n {new_path=}\n {dst=}\n {title=}, {artist=}\n") + + if not do_processing: + return + + with Session() as session: + track = Tracks.get_by_path(session, dst) + if track: + track.title = title + track.artist = artist + track.path = new_path + session.commit() + + print(f"os.unlink({dst}") + print(f"shutil.move({src}, {new_path}") + + os.unlink(dst) + shutil.move(src, new_path) + track = Tracks.get_by_path(session, new_path) + if track: + track.rescan(session) + else: + print(f"Can't find copied track {src=}, {dst=}") + + +main() diff --git a/poetry.lock b/poetry.lock index 3f0e179..4cd654d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -579,6 +579,17 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "thefuzz" +version = "0.19.0" +description = "Fuzzy string matching in python" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +speedup = ["python-levenshtein (>=0.12)"] + [[package]] name = "tinytag" version = "1.8.1" @@ -644,7 +655,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "9b4cf9915bf250afd948596a6ba82794f82abf6a6d4891bc51845409632c15fb" +content-hash = "f56b509dd26d76adcd2c01ee04f9e80ed736fb5189b27bcb5e005fc50ba773c9" [metadata.files] alembic = [ @@ -1005,6 +1016,7 @@ stack-data = [ {file = "stack_data-0.2.0.tar.gz", hash = "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12"}, ] text-unidecode = [] +thefuzz = [] tinytag = [ {file = "tinytag-1.8.1.tar.gz", hash = "sha256:363ab3107831a5598b68aaa061aba915fb1c7b4254d770232e65d5db8487636d"}, ] diff --git a/pyproject.toml b/pyproject.toml index 0c6b614..525544a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ pydub = "^0.25.1" PyQt5-sip = "^12.9.1" types-psutil = "^5.8.22" python-slugify = "^6.1.2" +thefuzz = "^0.19.0" [tool.poetry.dev-dependencies] ipdb = "^0.13.9" From 503ba36a8804fbfce70d3d68ee688a400051f785 Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Wed, 17 Aug 2022 17:09:19 +0100 Subject: [PATCH 2/7] Replacing files fine tuning --- app/replace_files.py | 79 +++++++++++++++++++++++++++----------------- poetry.lock | 20 ++++++++++- pyproject.toml | 2 ++ 3 files changed, 70 insertions(+), 31 deletions(-) diff --git a/app/replace_files.py b/app/replace_files.py index 54c25a8..c0d8a22 100755 --- a/app/replace_files.py +++ b/app/replace_files.py @@ -32,6 +32,12 @@ from thefuzz import process from string import ascii_lowercase as letters from typing import List +# ###################### SETTINGS ######################### +process_multiple_matches = True +do_processing = True +process_no_matches = True +# ######################################################### + def insensitive_glob(pattern): def either(c): @@ -44,10 +50,6 @@ source_dir = '/home/kae/music/Singles/tmp' # os.getcwd() parent_dir = os.path.dirname(source_dir) assert source_dir != parent_dir -process_multiple_matches = True -do_processing = False -process_no_matches = True - name_and_tags: List[str] = [] name_not_tags: List[str] = [] tags_not_name: List[str] = [] @@ -89,33 +91,47 @@ def main(): matches = insensitive_glob(os.path.join(parent_dir, stem) + '*') match_count = len(matches) if match_count == 0: - no_match.append(f"{fname}, {us_t=}, {us_a=}") - print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") - # Try fuzzy search - d = {} - while True: - for i, match in enumerate( + if process_no_matches: + print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") + # Try fuzzy search + d = {} + while True: + for i, match in enumerate( [a[0] for a in process.extract(fname, tracks, limit=5)] - ): - d[i] = match - for k, v in d.items(): - print(f"{k}: {v}") - data = input("pick one, return to quit: ") - if data == "": - break - try: - key = int(data) - except ValueError: - continue - if key in d: - print("***KAE confirm with tags") - dst = d[key] - process_track(new_file, dst, us_t, us_a) - break - else: + ): + d[i] = match + for k, v in d.items(): + print(f"{k}: {v}") + data = input("pick one, return to quit: ") + if data == "": + no_match.append(f"{fname}, {us_t=}, {us_a=}") + break + try: + key = int(data) + except ValueError: + continue + if key in d: + old_file = os.path.join(parent_dir, d[key]) + oldtags = get_tags(old_file) + old_title = oldtags['title'] + old_artist = oldtags['artist'] + print() + print(f" Title tag will change {old_title} → {us_t}") + print(f" Artist tag will change {old_artist} → {us_a}") + print() + data = input("Go ahead (y to accept)? ") + if data == "y": + process_track(new_file, old_file, us_t, us_a) + break + else: + no_match.append(f"{fname}, {us_t=}, {us_a=}") + continue + no_match.append(f"{fname}, {us_t=}, {us_a=}") continue - continue # from break after testing for "" in data - continue + else: + no_match.append(f"{fname}, {us_t=}, {us_a=}") + continue + if match_count > 1: multiple_similar.append(fname + "\n " + "\n ".join(matches)) if match_count <= 26 and process_multiple_matches: @@ -204,7 +220,10 @@ def process_track(src, dst, title, artist): os.unlink(dst) shutil.move(src, new_path) - track = Tracks.get_by_path(session, new_path) + try: + track = Tracks.get_by_path(session, new_path) + except: + import ipdb; ipdb.set_trace() if track: track.rescan(session) else: diff --git a/poetry.lock b/poetry.lock index 4cd654d..a0c289a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -366,6 +366,14 @@ category = "dev" optional = false python-versions = ">=3.8,<4.0" +[[package]] +name = "pyfzf" +version = "0.3.1" +description = "Python wrapper for junegunn's fuzzyfinder (fzf)" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "pygments" version = "2.11.2" @@ -481,6 +489,14 @@ pytest = ">=3.0.0" dev = ["pre-commit", "tox"] doc = ["sphinx", "sphinx-rtd-theme"] +[[package]] +name = "python-levenshtein" +version = "0.12.2" +description = "Python extension for computing string edit distances and similarities." +category = "main" +optional = false +python-versions = "*" + [[package]] name = "python-slugify" version = "6.1.2" @@ -655,7 +671,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "f56b509dd26d76adcd2c01ee04f9e80ed736fb5189b27bcb5e005fc50ba773c9" +content-hash = "b181eb743e8b6c9cb7e03c4db0bcef425fe410d2ec3c4c801ce20e448a26f166" [metadata.files] alembic = [ @@ -928,6 +944,7 @@ pydub = [ {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"}, ] pydub-stubs = [] +pyfzf = [] pygments = [ {file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"}, {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"}, @@ -997,6 +1014,7 @@ pytest-qt = [ {file = "pytest-qt-4.0.2.tar.gz", hash = "sha256:dfc5240dec7eb43b76bcb5f9a87eecae6ef83592af49f3af5f1d5d093acaa93e"}, {file = "pytest_qt-4.0.2-py2.py3-none-any.whl", hash = "sha256:e03847ac02a890ccaac0fde1748855b9dce425aceba62005c6cfced6cf7d5456"}, ] +python-levenshtein = [] python-slugify = [] python-vlc = [ {file = "python-vlc-3.0.16120.tar.gz", hash = "sha256:92f98fee088f72bd6d063b3b3312d0bd29b37e7ad65ddeb3a7303320300c2807"}, diff --git a/pyproject.toml b/pyproject.toml index 525544a..db24b5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,8 @@ PyQt5-sip = "^12.9.1" types-psutil = "^5.8.22" python-slugify = "^6.1.2" thefuzz = "^0.19.0" +python-Levenshtein = "^0.12.2" +pyfzf = "^0.3.1" [tool.poetry.dev-dependencies] ipdb = "^0.13.9" From 5f8d8572ad59b169ec3b4bb4b201fbba1b997551 Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Sun, 21 Aug 2022 19:47:47 +0100 Subject: [PATCH 3/7] Don't allow duplicate track paths --- app/models.py | 2 +- ...b3332_don_t_allow_duplicate_track_paths.py | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 migrations/versions/fe2e127b3332_don_t_allow_duplicate_track_paths.py diff --git a/app/models.py b/app/models.py index 1717225..4469777 100644 --- a/app/models.py +++ b/app/models.py @@ -508,7 +508,7 @@ class Tracks(Base): start_gap = Column(Integer, index=False) fade_at = Column(Integer, index=False) silence_at = Column(Integer, index=False) - path = Column(String(2048), index=False, nullable=False) + path = Column(String(2048), index=False, nullable=False, unique=True) mtime = Column(Float, index=True) playlistrows = relationship("PlaylistRows", back_populates="track") playlists = association_proxy("playlistrows", "playlist") diff --git a/migrations/versions/fe2e127b3332_don_t_allow_duplicate_track_paths.py b/migrations/versions/fe2e127b3332_don_t_allow_duplicate_track_paths.py new file mode 100644 index 0000000..4f515c3 --- /dev/null +++ b/migrations/versions/fe2e127b3332_don_t_allow_duplicate_track_paths.py @@ -0,0 +1,28 @@ +"""Don't allow duplicate track paths + +Revision ID: fe2e127b3332 +Revises: 0c604bf490f8 +Create Date: 2022-08-21 19:46:35.768659 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'fe2e127b3332' +down_revision = '0c604bf490f8' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_unique_constraint(None, 'tracks', ['path']) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'tracks', type_='unique') + # ### end Alembic commands ### From 62c5fa178cda7480c6d8407a5c54da20505b6068 Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Mon, 22 Aug 2022 14:39:18 +0100 Subject: [PATCH 4/7] Work around MariaDB bug in replace_files.py --- app/models.py | 15 +++++++++------ app/replace_files.py | 31 ++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/app/models.py b/app/models.py index 4469777..118ff21 100644 --- a/app/models.py +++ b/app/models.py @@ -572,12 +572,15 @@ class Tracks(Base): Return track with passed path, or None. """ - return ( - session.execute( - select(Tracks) - .where(Tracks.path == path) - ).scalar_one() - ) + try: + return ( + session.execute( + select(Tracks) + .where(Tracks.path == path) + ).scalar_one() + ) + except NoResultFound: + return None def rescan(self, session: Session) -> None: """ diff --git a/app/replace_files.py b/app/replace_files.py index c0d8a22..b05d9a9 100755 --- a/app/replace_files.py +++ b/app/replace_files.py @@ -27,15 +27,14 @@ from helpers import ( from models import Tracks from dbconfig import Session -from thefuzz import process - -from string import ascii_lowercase as letters +from thefuzz import process # type: ignore +from sqlalchemy.exc import IntegrityError from typing import List # ###################### SETTINGS ######################### -process_multiple_matches = True +process_multiple_matches = False do_processing = True -process_no_matches = True +process_no_matches = False # ######################################################### @@ -202,7 +201,10 @@ def main(): def process_track(src, dst, title, artist): new_path = os.path.join(os.path.dirname(dst), os.path.basename(src)) - print(f"process_track:\n {src=}\n {new_path=}\n {dst=}\n {title=}, {artist=}\n") + print( + f"process_track:\n {src=}\n {new_path=}\n " + f"{dst=}\n {title=}, {artist=}\n" + ) if not do_processing: return @@ -213,17 +215,24 @@ def process_track(src, dst, title, artist): track.title = title track.artist = artist track.path = new_path - session.commit() + try: + session.commit() + except IntegrityError: + # https://jira.mariadb.org/browse/MDEV-29345 workaround + session.rollback() + track.title = title + track.artist = artist + track.path = "DUMMY" + session.commit() + track.path = new_path + session.commit() print(f"os.unlink({dst}") print(f"shutil.move({src}, {new_path}") os.unlink(dst) shutil.move(src, new_path) - try: - track = Tracks.get_by_path(session, new_path) - except: - import ipdb; ipdb.set_trace() + track = Tracks.get_by_path(session, new_path) if track: track.rescan(session) else: From 6ce41d3314f4dfa866079a3ba8cf08172f6efd1e Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Mon, 22 Aug 2022 16:01:56 +0100 Subject: [PATCH 5/7] Check replace_files is run against production db --- app/replace_files.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/replace_files.py b/app/replace_files.py index b05d9a9..573000f 100755 --- a/app/replace_files.py +++ b/app/replace_files.py @@ -16,6 +16,7 @@ import glob import os import shutil +import sys from helpers import ( fade_point, @@ -60,6 +61,10 @@ print(f"{source_dir=}, {parent_dir=}") def main(): + if 'musicmuster_prod' not in os.environ.get('MM_DB'): + response = input("Not on production database - c to continue: ") + if response != "c": + sys.exit(0) tracks = os.listdir(parent_dir) for fname in os.listdir(source_dir): parent_file = os.path.join(parent_dir, fname) From 26358761e5f41874dce26777cb9de4903628daca Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Mon, 22 Aug 2022 16:07:44 +0100 Subject: [PATCH 6/7] Default to no processing in replace_files.py --- app/replace_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/replace_files.py b/app/replace_files.py index 573000f..9642917 100755 --- a/app/replace_files.py +++ b/app/replace_files.py @@ -34,7 +34,7 @@ from typing import List # ###################### SETTINGS ######################### process_multiple_matches = False -do_processing = True +do_processing = False process_no_matches = False # ######################################################### From f851fdcafe2fd86be5a098b4f3936c4bfd97188b Mon Sep 17 00:00:00 2001 From: Keith Edmunds Date: Mon, 22 Aug 2022 16:08:24 +0100 Subject: [PATCH 7/7] First draft of rename_singles.py --- app/rename_singles.py | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100755 app/rename_singles.py diff --git a/app/rename_singles.py b/app/rename_singles.py new file mode 100755 index 0000000..58f116a --- /dev/null +++ b/app/rename_singles.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# +# Script to manage renaming existing files in given directory and +# propagating that change to database. Typical usage: renaming files +# from 'title.mp3' to title - artist.mp3' +# +# Actions: +# +# - record all filenames and inode numbers +# - external: rename the files +# - update records with new filenames for each inode number +# - update external database with new paths + +import os +import sqlite3 + +PHASE = 2 + +# Check file of same name exists in parent directory +source_dir = '/home/kae/tmp/Singles' # os.getcwd() +db = "/home/kae/tmp/singles.sqlite" + + +def main(): + with sqlite3.connect(db) as connection: + cursor = connection.cursor() + if PHASE == 1: + cursor.execute( + "CREATE TABLE IF NOT EXISTS mp3s " + "(inode INTEGER, oldname TEXT, newname TEXT)" + ) + + for fname in os.listdir(source_dir): + fullpath = os.path.join(source_dir, fname) + inode = os.stat(fullpath).st_ino + sql = f'INSERT INTO mp3s VALUES ({inode}, "{fname}", "")' + cursor.execute(sql) + + if PHASE == 2: + for fname in os.listdir(source_dir): + fullpath = os.path.join(source_dir, fname) + inode = os.stat(fullpath).st_ino + sql = ( + f'UPDATE mp3s SET newname = "{fname}" WHERE inode={inode}' + ) + try: + cursor.execute(sql) + except sqlite3.OperationalError: + print(f"Error with {inode} -> {fname}") + + cursor.close() + + +main()