Compare commits

...

15 Commits

Author SHA1 Message Date
875ed7a668 dedupe before transcoding 2025-07-12 14:24:14 +01:00
599e76d44b implement transcoder as layer
- copy worker over

note: originally removed in commit 641abcdd90
2025-07-12 14:23:50 +01:00
d2d3e9c591 return file objects instead of paths
- don't call property as method
2025-07-12 14:16:09 +01:00
502e3a08da retrieve files from cascading subdirs 2025-07-12 14:05:22 +01:00
7fdb28d965 implement basic pruning based on dir name 2025-07-12 13:44:24 +01:00
a44d6f034a allow retrieval and pruning of contents by name 2025-07-12 13:43:51 +01:00
1c42269f92 iterate contents directly
- provide list of names
2025-07-12 13:21:10 +01:00
63f6f99caa don't create new log 2025-07-12 13:14:44 +01:00
8889f24166 obtain directory name from path 2025-07-12 13:14:11 +01:00
641abcdd90 remove transcode package 2025-07-12 12:45:50 +01:00
46332e1921 create dedupe stub 2025-07-12 12:37:16 +01:00
57069c2b69 create log 2025-07-12 12:37:06 +01:00
54944244ae create layer package and abstract class 2025-07-12 12:26:41 +01:00
92f9eae9e6 warn more descriptively 2025-07-02 20:17:21 +01:00
af908f51ca create dir package
- create classes for common directory types
2025-07-02 20:10:59 +01:00
17 changed files with 257 additions and 72 deletions

4
src/dir/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .directory import Directory
from .root import Root
from .artist import Artist
from .album import Album

22
src/dir/album.py Normal file
View File

@ -0,0 +1,22 @@
from .directory import Directory
from pathlib import Path
from log import Log
class Album(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ALB')
@property
def all_files(self) -> list:
# todo: handle unexpected dirs
return self.contents
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
contents.append(self.create_file(e))
elif e.is_dir():
self.log.warning('POP', f"Directory {e} ignored.")
return contents

19
src/dir/artist.py Normal file
View File

@ -0,0 +1,19 @@
from log import Log
from .directory import Directory
from .album import Album
from pathlib import Path
class Artist(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ART')
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
self.log.warning("POP", f"Warning, skipping non-dir '{e}' found in artist '{self.path.parts[-1]}'")
elif e.is_dir():
contents.append(Album(e, log))
return contents

61
src/dir/directory.py Normal file
View File

@ -0,0 +1,61 @@
from pathlib import Path
from .file import File, Track, Art, MiscFile
from abc import ABC, abstractmethod
from log import Log, LogCat
class Directory(ABC):
def __init__(self, path: Path, log: Log, logcat: str):
self.path = path
self.log = LogCat(log.queue, logcat)
self.contents = self.populate(log)
def __iter__(self):
return self.contents.__iter__()
def __getitem__(self, name):
for e in self:
if e.name == name:
return e
raise KeyError
def prune(self, name):
for e in self:
if e.name == name:
self.contents.remove(e)
return
raise KeyError
def __str__(self):
return self.name
@property
def all_files(self) -> list:
files = list()
for c in self:
# todo: handle unexpected files
files += c.all_files
return files
@property
def name(self):
return self.path.name
def by_name(self):
return [e.name for e in self.contents]
@abstractmethod
def populate(self, log: Log) -> list:
raise NotImplementedError
@staticmethod
def create_file(file: Path) -> File:
suffix = file.suffix
if suffix in ['.flac']:
return Track(file)
elif suffix in ['.jpg', '.jpeg', '.png']:
return Art(file)
else:
return MiscFile(file)

4
src/dir/file/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .file import File
from .track import Track
from .miscfile import MiscFile
from .art import Art

7
src/dir/file/art.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class Art(File):
def __init__(self, location: Path):
super().__init__(location)

View File

@ -1,10 +1,11 @@
from pathlib import Path
from abc import ABC
audio_extensions = ['.flac']
art_extensions = ['.jpg', '.jpeg', '.png']
class File:
class File(ABC):
def __init__(self, location: Path):
self.path = location

7
src/dir/file/miscfile.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class MiscFile(File):
def __init__(self, location: Path):
super().__init__(location)

7
src/dir/file/track.py Normal file
View File

@ -0,0 +1,7 @@
from . import File
from pathlib import Path
class Track(File):
def __init__(self, location: Path):
super().__init__(location)

18
src/dir/root.py Normal file
View File

@ -0,0 +1,18 @@
from .directory import Directory
from .artist import Artist
from pathlib import Path
from log import Log
class Root(Directory):
def __init__(self, path: Path, log: Log):
super().__init__(path, log, 'ROOT')
def populate(self, log: Log) -> list:
contents = list()
for e in self.path.iterdir():
if e.is_file():
self.log.warning("POP", f"Warning, skipping non-dir '{e}' found in root")
elif e.is_dir():
contents.append(Artist(e, log))
return contents

View File

@ -1,3 +1,4 @@
from .file import File
from .layer import Layer
from .worker import Worker
from .transcoder import Transcoder
from .dedupe import Dedupe

31
src/layers/dedupe.py Normal file
View File

@ -0,0 +1,31 @@
from log import Log
from dir import Root
from . import Layer
from dir import Artist
class Dedupe(Layer):
def __init__(self, other: Root, log: Log):
super().__init__(log, "TCD")
self.other = other
def _process(self, left: Root):
right = self.other
existing_artists = right.by_name()
for artist in left:
artist_name = artist.name
if artist_name in existing_artists:
self.prune_artist(artist, right[artist_name])
if len(artist.contents) == 0:
left.prune(artist_name)
self.log.info('PRN', f"Pruned artist: {artist_name}")
else:
continue # todo: fuzzy matching
def prune_artist(self, left: Artist, right: Artist):
existing_albums = right.by_name()
for album in left:
album_name = album.name
if album_name in existing_albums:
left.prune(album_name)
self.log.info('PRN', f"Pruned album: {album_name}")

14
src/layers/layer.py Normal file
View File

@ -0,0 +1,14 @@
from abc import ABC, abstractmethod
from dir import Root
from log import Log, LogCat
class Layer(ABC):
def __init__(self, log: Log, log_category: str):
self.log = LogCat(log.queue, log_category)
def process(self, root: Root):
self._process(root)
@abstractmethod
def _process(self, root: Root):
raise NotImplementedError

45
src/layers/transcoder.py Normal file
View File

@ -0,0 +1,45 @@
from pathlib import Path
from multiprocessing import Pool, Manager, set_start_method
from log import Log, LogCat
from .worker import Worker
from dir import Root
from .layer import Layer
class Transcoder(Layer):
def __init__(self, encoder: Path, extension: str, output_root: Path, log: Log, log_path: Path):
super().__init__(log, 'TCD')
self.encoder = encoder
self.extension = extension
self.output_root = output_root
self.log_path = log_path
def _process(self, root: Root):
transcode_list = root.all_files
self._transcode(transcode_list, self.encoder)
def _transcode(self, transcode_list: list, encoder: Path, workers=16):
manager = Manager()
queue = manager.Queue()
log = Log(self.log_path, queue)
logcat = LogCat(log.queue, "TCD")
args = [(str(self.output_root), self.extension, track, encoder, logcat) for track in transcode_list]
with Pool(workers) as pool:
pool.starmap(self.worker, args)
pool.close()
pool.join()
log.stop()
def _transcode_single_thread(self, transcode_list: list, encoder: Path):
log = Log(self.log_path)
logcat = LogCat(log.queue, "TCD")
worker_args = [(track, encoder) for track in transcode_list]
for track, encoder in worker_args:
self.worker(str(self.output_root), self.extension, track, encoder, logcat)
log.stop()
@staticmethod
def worker(output_root, extension, track, encoder, log):
w = Worker(output_root, extension)
w.transcode_worker(track, encoder, log)

View File

@ -1,15 +1,14 @@
import shutil
import subprocess
from . import File
from pathlib import Path
from dir.file import File
class Worker:
def __init__(self, output_root, extension):
self.output_root = Path(output_root)
self.extension = extension
def transcode_worker(self, track, encoder, log):
track = File(track)
def transcode_worker(self, track: File, encoder, log):
if track.is_art:
return self.copy_album_art(track, log)
elif track.is_audio:

View File

@ -1,8 +1,9 @@
import argparse
from os.path import realpath
from pathlib import Path
from transcode import Transcoder
from dir import Root
from log import Log
from layers import Dedupe, Transcoder
def get_args():
parser = argparse.ArgumentParser()
@ -17,9 +18,17 @@ def main(input_dir: Path, output_dir: Path, encoder: Path, out_extension: str =
log_path = wd / "logs"
if encoder.parts[-1] == "qaac64.exe":
out_extension = "m4a"
transcoder = Transcoder(encoder, out_extension, input_dir, output_dir, log_path)
transcoder.transcode()
log = Log(log_path)
input_root = Root(input_dir, log)
output_root = Root(output_dir, log)
dedupe = Dedupe(output_root, log)
dedupe.process(input_root)
transcoder = Transcoder(encoder, out_extension, output_dir, log, log_path)
transcoder.process(input_root)
log.stop()
if __name__ == '__main__':
args = get_args()

View File

@ -1,64 +0,0 @@
from pathlib import Path
from multiprocessing import Pool, Manager, set_start_method
from log import Log, LogCat
from . import Worker
class Transcoder:
def __init__(self, encoder: Path, extension: str, input_root: Path, output_root: Path, log_path: Path):
self.encoder = encoder
self.extension = extension
self.input_root = input_root
self.output_root = output_root
self.log_path = log_path
self.__log = Log(log_path)
self.log = LogCat(self.__log.queue, "TCD")
def transcode(self):
transcode_list = []
try:
for artist in self.input_root.iterdir():
if artist.is_dir():
for album in artist.iterdir():
if album.is_dir():
for file in album.iterdir():
if file.is_file():
if file.name == "DONE":
break
else:
transcode_list.append(file)
else:
self.log.warning("TRK", f"Warning, skipping non-dir '{album}' found in artist '{artist.parts[-1]}'")
continue
else:
self.log.warning("TRK", f"Warning, skipping non-dir '{artist}' found in root")
continue
self._transcode(transcode_list, self.encoder)
finally:
self.__log.stop()
def _transcode(self, transcode_list: list, encoder: Path, workers=16):
manager = Manager()
queue = manager.Queue()
log = Log(self.log_path, queue)
logcat = LogCat(log.queue, "TCD")
args = [(str(self.output_root), self.extension, track, encoder, logcat) for track in transcode_list]
with Pool(workers) as pool:
pool.starmap(self.worker, args)
pool.close()
pool.join()
log.stop()
def _transcode_single_thread(self, transcode_list: list, encoder: Path):
log = Log(self.log_path)
logcat = LogCat(log.queue, "TCD")
worker_args = [(track, encoder) for track in transcode_list]
for track, encoder in worker_args:
self.worker(str(self.output_root), self.extension, track, encoder, logcat)
log.stop()
@staticmethod
def worker(output_root, extension, track, encoder, log):
w = Worker(output_root, extension)
w.transcode_worker(track, encoder, log)