musicmuster/app/file_importer.py

721 lines
23 KiB
Python

from __future__ import annotations
from dataclasses import dataclass, field
from fuzzywuzzy import fuzz # type: ignore
import os.path
from typing import Optional, Sequence
import os
import shutil
# PyQt imports
from PyQt6.QtCore import (
pyqtSignal,
QObject,
QThread,
)
from PyQt6.QtWidgets import (
QButtonGroup,
QDialog,
QFileDialog,
QHBoxLayout,
QLabel,
QPushButton,
QRadioButton,
QVBoxLayout,
)
# Third party imports
# App imports
from classes import (
ApplicationError,
MusicMusterSignals,
Tags,
)
from config import Config
from helpers import (
file_is_unreadable,
get_tags,
show_OK,
)
from log import log
from models import db, Tracks
from music_manager import track_sequence
from playlistmodel import PlaylistModel
import helpers
@dataclass
class ThreadData:
"""
Data structure to hold details of the import thread context
"""
base_model: PlaylistModel
row_number: int
worker: Optional[DoTrackImport] = None
@dataclass
class TrackFileData:
"""
Data structure to hold details of file to be imported
"""
tags: Tags = Tags()
destination_path: str = ""
import_this_file: bool = True
error: str = ""
file_path_to_remove: Optional[str] = None
track_id: int = 0
track_match_data: list[TrackMatchData] = field(default_factory=list)
@dataclass
class TrackMatchData:
"""
Data structure to hold details of existing files that are similar to
the file being imported.
"""
artist: str
artist_match: float
title: str
title_match: float
track_id: int
class FileImporter:
"""
Class to manage the import of new tracks. Sanity checks are carried
out before processing each track.
They may replace existing tracks, be imported as new tracks, or the
import may be skipped altogether. The user decides which of these in
the UI managed by the PickMatch class.
The actual import is handled by the DoTrackImport class.
"""
def __init__(
self, base_model: PlaylistModel, row_number: Optional[int] = None
) -> None:
"""
Set up class
"""
# Create ModelData
if not row_number:
row_number = base_model.rowCount()
self.model_data = ThreadData(base_model=base_model, row_number=row_number)
# Populate self.import_files_data
for infile in [
os.path.join(Config.REPLACE_FILES_DEFAULT_SOURCE, f)
for f in os.listdir(Config.REPLACE_FILES_DEFAULT_SOURCE)
if f.endswith((".mp3", ".flac"))
]:
self.import_files_data[infile] = TrackFileData()
# Place to keep a reference to importer threads
self.threads: list[QThread] = []
# Data structure to track files to import
self.import_files_data: dict[str, TrackFileData] = {}
# Dictionary of exsting tracks indexed by track.id
self.existing_tracks = self._get_existing_tracks()
self.signals = MusicMusterSignals()
def _get_existing_tracks(self) -> Sequence[Tracks]:
"""
Return a list of all existing Tracks
"""
with db.Session() as session:
return Tracks.get_all(session)
def do_import(self) -> None:
"""
Populate self.import_files_data, which is a TrackFileData object for each entry.
- Validate files to be imported
- Find matches and similar files
- Get user choices for each import file
- Validate self.import_files_data integrity
- Tell the user which files won't be imported and why
- Import the files, one by one.
"""
if not self.import_files_data:
show_OK(
"File import",
f"No files in {Config.REPLACE_FILES_DEFAULT_SOURCE} to import",
None,
)
return
for path in self.import_files_data.keys():
self.validate_file(path)
if self.import_files_data[path].import_this_file:
self.find_similar(path)
if len(self.import_files_data[path].track_match_data) > 1:
self.sort_track_match_data(path)
selection = self.get_user_choices(path)
self.process_selection(path, selection)
if self.import_files_data[path].import_this_file:
self.validate_file_data(path)
# Tell user which files won't be imported and why
self.inform_user()
# Start the import of all other files
self.import_next_file()
def validate_file(self, path: str) -> None:
"""
- check all files are readable
- check all files have tags
- Mark failures not to be imported and populate error text.
On return, the following TrackFileData fields should be set:
tags: Yes
destination_path: No
import_this_file: Yes (set by default)
error: No (only set if an error is detected)
file_path_to_remove: No
track_id: No
track_match_data: No
"""
for path in self.import_files_data.keys():
if file_is_unreadable(path):
self.import_files_data[path].import_this_file = False
self.import_files_data[path].error = f"{path} is unreadable"
continue
try:
self.import_files_data[path].tags = get_tags(path)
except ApplicationError as e:
self.import_files_data[path].import_this_file = False
self.import_files_data[path].error = f"Tag errors ({str(e)})"
continue
def find_similar(self, path: str) -> None:
"""
- Search title in existing tracks
- if score >= Config.FUZZYMATCH_MINIMUM_LIST:
- get artist score
- add TrackMatchData to self.import_files_data[path].track_match_data
On return, the following TrackFileData fields should be set:
tags: Yes
destination_path: No
import_this_file: Yes (set by default)
error: No (only set if an error is detected)
file_path_to_remove: No
track_id: No
track_match_data: YES, IN THIS FUNCTION
"""
title = self.import_files_data[path].tags.title
artist = self.import_files_data[path].tags.artist
for existing_track in self.existing_tracks:
title_score = self._get_match_score(title, existing_track.title)
if title_score >= Config.FUZZYMATCH_MINIMUM_LIST:
artist_score = self._get_match_score(artist, existing_track.artist)
self.import_files_data[path].track_match_data.append(
TrackMatchData(
artist=existing_track.artist,
artist_match=artist_score,
title=existing_track.title,
title_match=title_score,
track_id=existing_track.id,
)
)
def sort_track_match_data(self, path: str) -> None:
"""
Sort matched tracks in artist-similarity order
"""
self.import_files_data[path].track_match_data.sort(
key=lambda x: x.artist_match, reverse=True
)
def _get_match_score(self, str1: str, str2: str) -> float:
"""
Return the score of how well str1 matches str2.
"""
ratio = fuzz.ratio(str1, str2)
partial_ratio = fuzz.partial_ratio(str1, str2)
token_sort_ratio = fuzz.token_sort_ratio(str1, str2)
token_set_ratio = fuzz.token_set_ratio(str1, str2)
# Combine scores
combined_score = (
ratio * 0.25
+ partial_ratio * 0.25
+ token_sort_ratio * 0.25
+ token_set_ratio * 0.25
)
return combined_score
def get_user_choices(self, path: str) -> int:
"""
Find out whether user wants to import this as a new track,
overwrite an existing track or not import it at all.
Return -1 (user cancelled) 0 (import as new) >0 (replace track id)
"""
# Build a list of (track title and artist, track_id, track path)
choices: list[tuple[str, int, str]] = []
# First choices are always a) don't import 2) import as a new track
choices.append((Config.DO_NOT_IMPORT, -1, ""))
choices.append((Config.IMPORT_AS_NEW, 0, ""))
# New track details
new_track_description = (
f"{self.import_files_data[path].tags.title} "
f"({self.import_files_data[path].tags.artist})"
)
# Select 'import as new' as default unless the top match is good
# enough
default = 1
track_match_data = self.import_files_data[path].track_match_data
if track_match_data:
if (
track_match_data[0].artist_match
>= Config.FUZZYMATCH_MINIMUM_SELECT_ARTIST
and track_match_data[0].title_match
>= Config.FUZZYMATCH_MINIMUM_SELECT_TITLE
):
default = 2
for xt in track_match_data:
xt_description = f"{xt.title} ({xt.artist})"
if Config.FUZZYMATCH_SHOW_SCORES:
xt_description += f" ({xt.title_match:.0f}%)"
existing_track_path = self._get_existing_track(xt.track_id).path
choices.append(
(
xt_description,
xt.track_id,
existing_track_path,
)
)
dialog = PickMatch(
new_track_description=new_track_description,
choices=choices,
default=default,
)
if dialog.exec():
return dialog.selected_track_id
else:
return -1
def process_selection(self, path: str, selection: int) -> None:
"""
Process selection from PickMatch
"""
if selection < 0:
# User cancelled
self.import_files_data[path].import_this_file = False
self.import_files_data[path].error = "you asked not to import this file"
elif selection > 0:
# Import and replace track
self.replace_file(path=path, track_id=selection)
else:
# Import as new
self.import_as_new(path=path)
def replace_file(self, path: str, track_id: int) -> None:
"""
Set up to replace an existing file.
On return, the following TrackFileData fields should be set:
tags: Yes
destination_path: YES, IN THIS FUNCTION
import_this_file: Yes (set by default)
error: No (only set if an error is detected)
file_path_to_remove: YES, IN THIS FUNCTION
track_id: YES, IN THIS FUNCTION
track_match_data: Yes
"""
ifd = self.import_files_data[path]
if track_id < 1:
raise ApplicationError(f"No track ID: replace_file({path=}, {track_id=})")
ifd.track_id = track_id
existing_track_path = self._get_existing_track(track_id).path
ifd.file_path_to_remove = existing_track_path
# If the existing file in the Config.IMPORT_DESTINATION
# directory, replace it with the imported file name; otherwise,
# use the existing file name. This so that we don't change file
# names from CDs, etc.
if os.path.dirname(existing_track_path) == Config.IMPORT_DESTINATION:
ifd.destination_path = os.path.join(
Config.IMPORT_DESTINATION, os.path.basename(path)
)
else:
ifd.destination_path = existing_track_path
def _get_existing_track(self, track_id: int) -> Tracks:
"""
Lookup in existing track in the local cache and return it
"""
existing_track_records = [a for a in self.existing_tracks if a.id == track_id]
if len(existing_track_records) != 1:
raise ApplicationError(
f"Internal error in _get_existing_track: {existing_track_records=}"
)
return existing_track_records[0]
def import_as_new(self, path: str) -> None:
"""
Set up to import as a new file.
On return, the following TrackFileData fields should be set:
tags: Yes
destination_path: YES, IN THIS FUNCTION
import_this_file: Yes (set by default)
error: No (only set if an error is detected)
file_path_to_remove: No (not needed now)
track_id: Yes
track_match_data: Yes
"""
ifd = self.import_files_data[path]
ifd.destination_path = os.path.join(
Config.IMPORT_DESTINATION, os.path.basename(path)
)
def validate_file_data(self, path: str) -> None:
"""
Check the data structures for integrity
"""
ifd = self.import_files_data[path]
# Check import_this_file
if not ifd.import_this_file:
return
# Check tags
if not (ifd.tags.artist and ifd.tags.title):
raise ApplicationError(f"validate_file_data: {ifd.tags=}, {path=}")
# Check file_path_to_remove
if ifd.file_path_to_remove and not os.path.exists(ifd.file_path_to_remove):
# File to remove is missing, but this isn't a major error. We
# may be importing to replace a deleted file.
ifd.file_path_to_remove = ""
# Check destination_path
if not ifd.destination_path:
raise ApplicationError(
f"validate_file_data: no destination path set ({path=})"
)
# If destination path is the same as file_path_to_remove, that's
# OK, otherwise if this is a new import then check check
# destination path doesn't already exists
if ifd.track_id == 0 and ifd.destination_path != ifd.file_path_to_remove:
while os.path.exists(ifd.destination_path):
msg = (
"New import requested but default destination path ({ifd.destination_path}) "
"already exists. Click OK and choose where to save this track"
)
show_OK(title="Desintation path exists", msg=msg, parent=None)
# Get output filename
pathspec = QFileDialog.getSaveFileName(
None,
"Save imported track",
directory=Config.IMPORT_DESTINATION,
)
if pathspec:
ifd.destination_path = pathspec[0]
else:
ifd.import_this_file = False
ifd.error = "destination file already exists"
return
# Check track_id
if ifd.track_id < 0:
raise ApplicationError(f"validate_file_data: track_id < 0, {path=}")
def inform_user(self) -> None:
"""
Tell user about files that won't be imported
"""
msgs: list[str] = []
for path, entry in self.import_files_data.items():
if entry.import_this_file is False:
msgs.append(
f"{os.path.basename(path)} will not be imported because {entry.error}"
)
if msgs:
show_OK("File not imported", "\r\r".join(msgs))
def import_next_file(self) -> None:
"""
Import the next file sequentially.
"""
while True:
if not self.import_files_data:
self.signals.status_message_signal.emit("All files imported", 10000)
return
# Get details for next file to import
path, tfd = self.import_files_data.popitem()
if tfd.import_this_file:
break
print(f"import_next_file {path=}")
# Create and start a thread for processing
worker = DoTrackImport(
import_file_path=path,
tags=tfd.tags,
destination_path=tfd.destination_path,
track_id=tfd.track_id,
)
thread = QThread()
self.threads.append(thread)
# Move worker to thread
worker.moveToThread(thread)
# Connect signals and slots
thread.started.connect(worker.run)
thread.started.connect(lambda: print(f"Thread starting for {path=}"))
worker.import_finished.connect(self.post_import_processing)
worker.import_finished.connect(thread.quit)
worker.import_finished.connect(lambda: print(f"Worker ended for {path=}"))
# Ensure cleanup only after thread is fully stopped
thread.finished.connect(lambda: self.cleanup_thread(thread, worker))
thread.finished.connect(lambda: print(f"Thread ended for {path=}"))
# Start the thread
print(f"Calling thread.start() for {path=}")
thread.start()
def cleanup_thread(self, thread, worker):
"""
Remove references to finished threads/workers to prevent leaks.
"""
worker.deleteLater()
thread.deleteLater()
if thread in self.threads:
self.threads.remove(thread)
def post_import_processing(self, track_id: int) -> None:
"""
If track already in playlist, refresh it else insert it
"""
log.debug(f"post_import_processing({track_id=})")
if self.model_data:
if self.model_data.base_model:
self.model_data.base_model.update_or_insert(
track_id, self.model_data.row_number
)
# Process next file
self.import_next_file()
class DoTrackImport(QObject):
"""
Class to manage the actual import of tracks in a thread.
"""
import_finished = pyqtSignal(int)
def __init__(
self,
import_file_path: str,
tags: Tags,
destination_path: str,
track_id: int,
) -> None:
"""
Save parameters
"""
super().__init__()
self.import_file_path = import_file_path
self.tags = tags
self.destination_track_path = destination_path
self.track_id = track_id
self.signals = MusicMusterSignals()
def run(self) -> None:
"""
Either create track objects from passed files or update exising track
objects.
And add to visible playlist or update playlist if track already present.
"""
temp_file: Optional[str] = None
# Get audio metadata in this thread rather than calling function to save interactive time
self.audio_metadata = helpers.get_audio_metadata(self.import_file_path)
# If destination exists, move it out of the way
if os.path.exists(self.destination_track_path):
temp_file = self.destination_track_path + ".TMP"
shutil.move(self.destination_track_path, temp_file)
# Move file to destination
shutil.move(self.import_file_path, self.destination_track_path)
# Clean up
if temp_file and os.path.exists(temp_file):
os.unlink(temp_file)
with db.Session() as session:
self.signals.status_message_signal.emit(
f"Importing {os.path.basename(self.import_file_path)}", 5000
)
if self.track_id == 0:
# Import new track
try:
track = Tracks(
session,
path=self.destination_track_path,
**self.tags._asdict(),
**self.audio_metadata._asdict(),
)
except Exception as e:
self.signals.show_warning_signal.emit(
"Error importing track", str(e)
)
return
else:
track = session.get(Tracks, self.track_id)
if track:
for key, value in self.tags._asdict().items():
if hasattr(track, key):
setattr(track, key, value)
for key, value in self.audio_metadata._asdict().items():
if hasattr(track, key):
setattr(track, key, value)
track.path = self.destination_track_path
session.commit()
helpers.normalise_track(self.destination_track_path)
self.signals.status_message_signal.emit(
f"{os.path.basename(self.import_file_path)} imported", 10000
)
self.import_finished.emit(track.id)
class PickMatch(QDialog):
"""
Dialog for user to select which existing track to replace or to
import to a new track
"""
def __init__(
self,
new_track_description: str,
choices: list[tuple[str, int, str]],
default: int,
) -> None:
super().__init__()
self.new_track_description = new_track_description
self.default = default
self.init_ui(choices)
self.selected_track_id = -1
def init_ui(self, choices: list[tuple[str, int, str]]) -> None:
"""
Set up dialog
"""
self.setWindowTitle("New or replace")
layout = QVBoxLayout()
# Add instructions
instructions = (
f"Importing {self.new_track_description}.\n"
"Import as a new track or replace existing track?"
)
instructions_label = QLabel(instructions)
layout.addWidget(instructions_label)
# Create a button group for radio buttons
self.button_group = QButtonGroup()
# Add radio buttons for each item
for idx, (track_description, track_id, track_path) in enumerate(choices):
if (
track_sequence.current
and track_id
and track_sequence.current.track_id == track_id
):
# Don't allow current track to be replaced
track_description = "(Currently playing) " + track_description
radio_button = QRadioButton(track_description)
radio_button.setDisabled(True)
self.button_group.addButton(radio_button, -1)
else:
radio_button = QRadioButton(track_description)
radio_button.setToolTip(track_path)
self.button_group.addButton(radio_button, track_id)
layout.addWidget(radio_button)
# Select the second item by default (import as new)
if idx == self.default:
radio_button.setChecked(True)
# Add OK and Cancel buttons
button_layout = QHBoxLayout()
ok_button = QPushButton("OK")
cancel_button = QPushButton("Cancel")
button_layout.addWidget(ok_button)
button_layout.addWidget(cancel_button)
layout.addLayout(button_layout)
self.setLayout(layout)
# Connect buttons to actions
ok_button.clicked.connect(self.on_ok)
cancel_button.clicked.connect(self.reject)
def on_ok(self):
# Get the ID of the selected button
self.selected_track_id = self.button_group.checkedId()
self.accept()