Compare commits

...

15 Commits

Author SHA1 Message Date
875ed7a668 dedupe before transcoding 2025-07-12 14:24:14 +01:00
599e76d44b implement transcoder as layer
- copy worker over

note: originally removed in commit 641abcdd90
2025-07-12 14:23:50 +01:00
d2d3e9c591 return file objects instead of paths
- don't call property as method
2025-07-12 14:16:09 +01:00
502e3a08da retrieve files from cascading subdirs 2025-07-12 14:05:22 +01:00
7fdb28d965 implement basic pruning based on dir name 2025-07-12 13:44:24 +01:00
a44d6f034a allow retrieval and pruning of contents by name 2025-07-12 13:43:51 +01:00
1c42269f92 iterate contents directly
- provide list of names
2025-07-12 13:21:10 +01:00
63f6f99caa don't create new log 2025-07-12 13:14:44 +01:00
8889f24166 obtain directory name from path 2025-07-12 13:14:11 +01:00
641abcdd90 remove transcode package 2025-07-12 12:45:50 +01:00
46332e1921 create dedupe stub 2025-07-12 12:37:16 +01:00
57069c2b69 create log 2025-07-12 12:37:06 +01:00
54944244ae create layer package and abstract class 2025-07-12 12:26:41 +01:00
92f9eae9e6 warn more descriptively 2025-07-02 20:17:21 +01:00
af908f51ca create dir package
- create classes for common directory types
2025-07-02 20:10:59 +01:00
17 changed files with 257 additions and 72 deletions

4
src/dir/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .directory import Directory
from .root import Root
from .artist import Artist
from .album import Album

22
src/dir/album.py Normal file
View File

@ -0,0 +1,22 @@
from .directory import Directory
from pathlib import Path
from log import Log
class Album(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ALB')
@property
def all_files(self) -> list:
# todo: handle unexpected dirs
return self.contents
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
contents.append(self.create_file(e))
elif e.is_dir():
self.log.warning('POP', f"Directory {e} ignored.")
return contents

19
src/dir/artist.py Normal file
View File

@ -0,0 +1,19 @@
from log import Log
from .directory import Directory
from .album import Album
from pathlib import Path
class Artist(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ART')
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
self.log.warning("POP", f"Warning, skipping non-dir '{e}' found in artist '{self.path.parts[-1]}'")
elif e.is_dir():
contents.append(Album(e, log))
return contents

61
src/dir/directory.py Normal file
View File

@ -0,0 +1,61 @@
from pathlib import Path
from .file import File, Track, Art, MiscFile
from abc import ABC, abstractmethod
from log import Log, LogCat
class Directory(ABC):
def __init__(self, path: Path, log: Log, logcat: str):
self.path = path
self.log = LogCat(log.queue, logcat)
self.contents = self.populate(log)
def __iter__(self):
return self.contents.__iter__()
def __getitem__(self, name):
for e in self:
if e.name == name:
return e
raise KeyError
def prune(self, name):
for e in self:
if e.name == name:
self.contents.remove(e)
return
raise KeyError
def __str__(self):
return self.name
@property
def all_files(self) -> list:
files = list()
for c in self:
# todo: handle unexpected files
files += c.all_files
return files
@property
def name(self):
return self.path.name
def by_name(self):
return [e.name for e in self.contents]
@abstractmethod
def populate(self, log: Log) -> list:
raise NotImplementedError
@staticmethod
def create_file(file: Path) -> File:
suffix = file.suffix
if suffix in ['.flac']:
return Track(file)
elif suffix in ['.jpg', '.jpeg', '.png']:
return Art(file)
else:
return MiscFile(file)

4
src/dir/file/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .file import File
from .track import Track
from .miscfile import MiscFile
from .art import Art

7
src/dir/file/art.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class Art(File):
def __init__(self, location: Path):
super().__init__(location)

View File

@ -1,10 +1,11 @@
from pathlib import Path from pathlib import Path
from abc import ABC
audio_extensions = ['.flac'] audio_extensions = ['.flac']
art_extensions = ['.jpg', '.jpeg', '.png'] art_extensions = ['.jpg', '.jpeg', '.png']
class File: class File(ABC):
def __init__(self, location: Path): def __init__(self, location: Path):
self.path = location self.path = location

7
src/dir/file/miscfile.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class MiscFile(File):
def __init__(self, location: Path):
super().__init__(location)

7
src/dir/file/track.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class Track(File):
def __init__(self, location: Path):
super().__init__(location)

18
src/dir/root.py Normal file
View File

@ -0,0 +1,18 @@
from .directory import Directory
from .artist import Artist
from pathlib import Path
from log import Log
class Root(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ROOT')
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
self.log.warning("POP", f"Warning, skipping non-dir '{e}' found in root")
elif e.is_dir():
contents.append(Artist(e, log))
return contents

View File

@ -1,3 +1,4 @@
from .file import File from .layer import Layer
from .worker import Worker from .worker import Worker
from .transcoder import Transcoder from .transcoder import Transcoder
from .dedupe import Dedupe

31
src/layers/dedupe.py Normal file
View File

@ -0,0 +1,31 @@
from log import Log
from dir import Root
from . import Layer
from dir import Artist
class Dedupe(Layer):
def __init__(self, other: Root, log: Log):
super().__init__(log, "TCD")
self.other = other
def _process(self, left: Root):
right = self.other
existing_artists = right.by_name()
for artist in left:
artist_name = artist.name
if artist_name in existing_artists:
self.prune_artist(artist, right[artist_name])
if len(artist.contents) == 0:
left.prune(artist_name)
self.log.info('PRN', f"Pruned artist: {artist_name}")
else:
continue # todo: fuzzy matching
def prune_artist(self, left: Artist, right: Artist):
existing_albums = right.by_name()
for album in left:
album_name = album.name
if album_name in existing_albums:
left.prune(album_name)
self.log.info('PRN', f"Pruned album: {album_name}")

14
src/layers/layer.py Normal file
View File

@ -0,0 +1,14 @@
from abc import ABC, abstractmethod
from dir import Root
from log import Log, LogCat
class Layer(ABC):
def __init__(self, log: Log, log_category: str):
self.log = LogCat(log.queue, log_category)
def process(self, root: Root):
self._process(root)
@abstractmethod
def _process(self, root: Root):
raise NotImplementedError

45
src/layers/transcoder.py Normal file
View File

@ -0,0 +1,45 @@
from pathlib import Path
from multiprocessing import Pool, Manager, set_start_method
from log import Log, LogCat
from .worker import Worker
from dir import Root
from .layer import Layer
class Transcoder(Layer):
def __init__(self, encoder: Path, extension: str, output_root: Path, log: Log, log_path: Path):
super().__init__(log, 'TCD')
self.encoder = encoder
self.extension = extension
self.output_root = output_root
self.log_path = log_path
def _process(self, root: Root):
transcode_list = root.all_files
self._transcode(transcode_list, self.encoder)
def _transcode(self, transcode_list: list, encoder: Path, workers=16):
manager = Manager()
queue = manager.Queue()
log = Log(self.log_path, queue)
logcat = LogCat(log.queue, "TCD")
args = [(str(self.output_root), self.extension, track, encoder, logcat) for track in transcode_list]
with Pool(workers) as pool:
pool.starmap(self.worker, args)
pool.close()
pool.join()
log.stop()
def _transcode_single_thread(self, transcode_list: list, encoder: Path):
log = Log(self.log_path)
logcat = LogCat(log.queue, "TCD")
worker_args = [(track, encoder) for track in transcode_list]
for track, encoder in worker_args:
self.worker(str(self.output_root), self.extension, track, encoder, logcat)
log.stop()
@staticmethod
def worker(output_root, extension, track, encoder, log):
w = Worker(output_root, extension)
w.transcode_worker(track, encoder, log)

View File

@ -1,15 +1,14 @@
import shutil import shutil
import subprocess import subprocess
from . import File
from pathlib import Path from pathlib import Path
from dir.file import File
class Worker: class Worker:
def __init__(self, output_root, extension): def __init__(self, output_root, extension):
self.output_root = Path(output_root) self.output_root = Path(output_root)
self.extension = extension self.extension = extension
def transcode_worker(self, track, encoder, log): def transcode_worker(self, track: File, encoder, log):
track = File(track)
if track.is_art: if track.is_art:
return self.copy_album_art(track, log) return self.copy_album_art(track, log)
elif track.is_audio: elif track.is_audio:

View File

@ -1,8 +1,9 @@
import argparse import argparse
from os.path import realpath from os.path import realpath
from pathlib import Path from pathlib import Path
from transcode import Transcoder from dir import Root
from log import Log from log import Log
from layers import Dedupe, Transcoder
def get_args(): def get_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -17,9 +18,17 @@ def main(input_dir: Path, output_dir: Path, encoder: Path, out_extension: str =
log_path = wd / "logs" log_path = wd / "logs"
if encoder.parts[-1] == "qaac64.exe": if encoder.parts[-1] == "qaac64.exe":
out_extension = "m4a" out_extension = "m4a"
transcoder = Transcoder(encoder, out_extension, input_dir, output_dir, log_path) log = Log(log_path)
transcoder.transcode() input_root = Root(input_dir, log)
output_root = Root(output_dir, log)
dedupe = Dedupe(output_root, log)
dedupe.process(input_root)
transcoder = Transcoder(encoder, out_extension, output_dir, log, log_path)
transcoder.process(input_root)
log.stop()
if __name__ == '__main__': if __name__ == '__main__':
args = get_args() args = get_args()

View File

@ -1,64 +0,0 @@
from pathlib import Path
from multiprocessing import Pool, Manager, set_start_method
from log import Log, LogCat
from . import Worker
class Transcoder:
def __init__(self, encoder: Path, extension: str, input_root: Path, output_root: Path, log_path: Path):
self.encoder = encoder
self.extension = extension
self.input_root = input_root
self.output_root = output_root
self.log_path = log_path
self.__log = Log(log_path)
self.log = LogCat(self.__log.queue, "TCD")
def transcode(self):
transcode_list = []
try:
for artist in self.input_root.iterdir():
if artist.is_dir():
for album in artist.iterdir():
if album.is_dir():
for file in album.iterdir():
if file.is_file():
if file.name == "DONE":
break
else:
transcode_list.append(file)
else:
self.log.warning("TRK", f"Warning, skipping non-dir '{album}' found in artist '{artist.parts[-1]}'")
continue
else:
self.log.warning("TRK", f"Warning, skipping non-dir '{artist}' found in root")
continue
self._transcode(transcode_list, self.encoder)
finally:
self.__log.stop()
def _transcode(self, transcode_list: list, encoder: Path, workers=16):
manager = Manager()
queue = manager.Queue()
log = Log(self.log_path, queue)
logcat = LogCat(log.queue, "TCD")
args = [(str(self.output_root), self.extension, track, encoder, logcat) for track in transcode_list]
with Pool(workers) as pool:
pool.starmap(self.worker, args)
pool.close()
pool.join()
log.stop()
def _transcode_single_thread(self, transcode_list: list, encoder: Path):
log = Log(self.log_path)
logcat = LogCat(log.queue, "TCD")
worker_args = [(track, encoder) for track in transcode_list]
for track, encoder in worker_args:
self.worker(str(self.output_root), self.extension, track, encoder, logcat)
log.stop()
@staticmethod
def worker(output_root, extension, track, encoder, log):
w = Worker(output_root, extension)
w.transcode_worker(track, encoder, log)