diff --git a/app/models.py b/app/models.py index 43858c5..1717225 100644 --- a/app/models.py +++ b/app/models.py @@ -593,7 +593,7 @@ class Tracks(Base): Config.MILLISECOND_SIGFIGS) * 1000 self.start_gap = leading_silence(audio) session.add(self) - session.flush() + session.commit() # @staticmethod # def remove_by_path(session: Session, path: str) -> None: diff --git a/app/replace_files.py b/app/replace_files.py new file mode 100755 index 0000000..54c25a8 --- /dev/null +++ b/app/replace_files.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# +# Script to replace existing files in parent directory. Typical usage: +# the current directory contains a "better" version of the file than the +# parent (eg, bettet bitrate). +# +# Actions: +# +# - check that the same filename is present in the parent directory +# - check that the artist and title tags are the same +# - append ".bak" to the version in the parent directory +# - move file to parent directory +# - normalise file +# - update duration, start_gap, fade_at, silence_at, mtime in database + +import glob +import os +import shutil + +from helpers import ( + fade_point, + get_audio_segment, + get_tags, + leading_silence, + trailing_silence, +) + +from models import Tracks +from dbconfig import Session +from thefuzz import process + +from string import ascii_lowercase as letters +from typing import List + + +def insensitive_glob(pattern): + def either(c): + return '[%s%s]' % (c.lower(), c.upper()) if c.isalpha() else c + return glob.glob(''.join(map(either, pattern))) + + +# Check file of same name exists in parent directory +source_dir = '/home/kae/music/Singles/tmp' # os.getcwd() +parent_dir = os.path.dirname(source_dir) +assert source_dir != parent_dir + +process_multiple_matches = True +do_processing = False +process_no_matches = True + +name_and_tags: List[str] = [] +name_not_tags: List[str] = [] +tags_not_name: List[str] = [] +multiple_similar: List[str] = [] +no_match: List[str] = [] +possibles: List[str] = [] + +print(f"{source_dir=}, {parent_dir=}") + + +def main(): + tracks = os.listdir(parent_dir) + for fname in os.listdir(source_dir): + parent_file = os.path.join(parent_dir, fname) + new_file = os.path.join(source_dir, fname) + us = get_tags(new_file) + us_t = us['title'] + us_a = us['artist'] + + if os.path.exists(parent_file): + # File exists, check tags + p = get_tags(parent_file) + p_t = p['title'] + p_a = p['artist'] + if ( + (str(p_t).lower() != str(us_t).lower()) or + (str(p_a).lower() != str(us_a).lower()) + ): + name_not_tags.append( + f" {fname=}, {p_t} → {us_t}, {p_a} → {us_a}") + process_track(new_file, parent_file, us_t, us_a) + continue + name_and_tags.append(new_file) + process_track(new_file, parent_file, us_t, us_a) + continue + + # Try to find a near match + stem = fname.split(".")[0] + matches = insensitive_glob(os.path.join(parent_dir, stem) + '*') + match_count = len(matches) + if match_count == 0: + no_match.append(f"{fname}, {us_t=}, {us_a=}") + print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") + # Try fuzzy search + d = {} + while True: + for i, match in enumerate( + [a[0] for a in process.extract(fname, tracks, limit=5)] + ): + d[i] = match + for k, v in d.items(): + print(f"{k}: {v}") + data = input("pick one, return to quit: ") + if data == "": + break + try: + key = int(data) + except ValueError: + continue + if key in d: + print("***KAE confirm with tags") + dst = d[key] + process_track(new_file, dst, us_t, us_a) + break + else: + continue + continue # from break after testing for "" in data + continue + if match_count > 1: + multiple_similar.append(fname + "\n " + "\n ".join(matches)) + if match_count <= 26 and process_multiple_matches: + print(f"\n file={fname}\n title={us_t}\n artist={us_a}\n") + d = {} + while True: + for i, match in enumerate(matches): + d[i] = match + for k, v in d.items(): + print(f"{k}: {v}") + data = input("pick one, return to quit: ") + if data == "": + break + try: + key = int(data) + except ValueError: + continue + if key in d: + dst = d[key] + process_track(new_file, dst, us_t, us_a) + break + else: + continue + continue # from break after testing for "" in data + # One match, check tags + sim_name = matches[0] + p = get_tags(sim_name) + p_t = p['title'] + p_a = p['artist'] + if ( + (str(p_t).lower() != str(us_t).lower()) or + (str(p_a).lower() != str(us_a).lower()) + ): + possibles.append( + f"File: {os.path.basename(sim_name)} → {fname}" + f"\n {p_t} → {us_t}\n {p_a} → {us_a}" + ) + process_track(new_file, sim_name, us_t, us_a) + continue + tags_not_name.append(f"Rename {os.path.basename(sim_name)} → {fname}") + process_track(new_file, sim_name, us_t, us_a) + + print(f"Name and tags match ({len(name_and_tags)}):") + # print(" \n".join(name_and_tags)) + # print() + + print(f"Name but not tags match ({len(name_not_tags)}):") + print(" \n".join(name_not_tags)) + print() + + print(f"Tags but not name match ({len(tags_not_name)}):") + # print(" \n".join(tags_not_name)) + # print() + + print(f"Multiple similar names ({len(multiple_similar)}):") + print(" \n".join(multiple_similar)) + print() + + print(f"Possibles: ({len(possibles)}):") + print(" \n".join(possibles)) + print() + + print(f"No match ({len(no_match)}):") + print(" \n".join(no_match)) + print() + + +def process_track(src, dst, title, artist): + + new_path = os.path.join(os.path.dirname(dst), os.path.basename(src)) + print(f"process_track:\n {src=}\n {new_path=}\n {dst=}\n {title=}, {artist=}\n") + + if not do_processing: + return + + with Session() as session: + track = Tracks.get_by_path(session, dst) + if track: + track.title = title + track.artist = artist + track.path = new_path + session.commit() + + print(f"os.unlink({dst}") + print(f"shutil.move({src}, {new_path}") + + os.unlink(dst) + shutil.move(src, new_path) + track = Tracks.get_by_path(session, new_path) + if track: + track.rescan(session) + else: + print(f"Can't find copied track {src=}, {dst=}") + + +main() diff --git a/poetry.lock b/poetry.lock index 3f0e179..4cd654d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -579,6 +579,17 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "thefuzz" +version = "0.19.0" +description = "Fuzzy string matching in python" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +speedup = ["python-levenshtein (>=0.12)"] + [[package]] name = "tinytag" version = "1.8.1" @@ -644,7 +655,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "9b4cf9915bf250afd948596a6ba82794f82abf6a6d4891bc51845409632c15fb" +content-hash = "f56b509dd26d76adcd2c01ee04f9e80ed736fb5189b27bcb5e005fc50ba773c9" [metadata.files] alembic = [ @@ -1005,6 +1016,7 @@ stack-data = [ {file = "stack_data-0.2.0.tar.gz", hash = "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12"}, ] text-unidecode = [] +thefuzz = [] tinytag = [ {file = "tinytag-1.8.1.tar.gz", hash = "sha256:363ab3107831a5598b68aaa061aba915fb1c7b4254d770232e65d5db8487636d"}, ] diff --git a/pyproject.toml b/pyproject.toml index 0c6b614..525544a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ pydub = "^0.25.1" PyQt5-sip = "^12.9.1" types-psutil = "^5.8.22" python-slugify = "^6.1.2" +thefuzz = "^0.19.0" [tool.poetry.dev-dependencies] ipdb = "^0.13.9"