diff --git a/config/enhancer_stage1.yaml b/config/enhancer_stage1.yaml index 8689c39..43fc23f 100644 --- a/config/enhancer_stage1.yaml +++ b/config/enhancer_stage1.yaml @@ -1,5 +1,4 @@ lcfm_training_mode: ae load_fg_only: true batch_size_per_gpu: 16 -# Uncomment this if you are training your model -# denoiser_run_dir: runs/denoiser +denoiser_run_dir: runs/denoiser diff --git a/config/enhancer_stage2.yaml b/config/enhancer_stage2.yaml index 71eafd1..e9747fc 100644 --- a/config/enhancer_stage2.yaml +++ b/config/enhancer_stage2.yaml @@ -4,6 +4,5 @@ training_seconds: 3.0 gan_training_start_step: null lcfm_z_scale: 6 praat_augment_prob: 0.2 -# Uncomment this if you are training your model -# enhancer_stage1_run_dir: runs/enhancer_stage1 -# denoiser_run_dir: runs/denoiser +denoiser_run_dir: runs/denoiser +enhancer_stage1_run_dir: runs/enhancer_stage1 diff --git a/resemble_enhance/data/distorter/custom.py b/resemble_enhance/data/distorter/custom.py index 999632c..28428f7 100644 --- a/resemble_enhance/data/distorter/custom.py +++ b/resemble_enhance/data/distorter/custom.py @@ -42,6 +42,8 @@ def _sample_rir(self): return rir def apply(self, wav, sr): + # ref: https://github.com/haoheliu/voicefixer_main/blob/b06e07c945ac1d309b8a57ddcd599ca376b98cd9/dataloaders/augmentation/magical_effects.py#L158 + if len(self.rir_paths) == 0: return wav diff --git a/resemble_enhance/enhancer/__main__.py b/resemble_enhance/enhancer/__main__.py index 44f8132..43efc65 100644 --- a/resemble_enhance/enhancer/__main__.py +++ b/resemble_enhance/enhancer/__main__.py @@ -1,5 +1,6 @@ import argparse import random +import time from pathlib import Path import torch @@ -70,6 +71,8 @@ def main(): args = parser.parse_args() + start_time = time.perf_counter() + run_dir = args.run_dir paths = sorted(args.in_dir.glob(f"**/*{args.suffix}")) @@ -111,6 +114,10 @@ def main(): out_path.parent.mkdir(parents=True, exist_ok=True) torchaudio.save(out_path, hwav[None], sr) + # Cool emoji effect saying the job is done + elapsed_time = time.perf_counter() - start_time + print(f"🌟 Enhancement done! {len(paths)} files processed in {elapsed_time:.2f}s") + if __name__ == "__main__": main() diff --git a/setup.py b/setup.py index 73c530c..ea7caa0 100644 --- a/setup.py +++ b/setup.py @@ -10,8 +10,8 @@ def shell(*args): return out.decode("ascii").strip() -def write_version(version_core, dev=True): - if dev: +def write_version(version_core, pre_release=True): + if pre_release: last_commit_time = shell("git", "log", "-1", "--format=%cd", "--date=iso-strict") last_commit_time = datetime.strptime(last_commit_time, "%Y-%m-%dT%H:%M:%S%z") last_commit_time = last_commit_time.astimezone(timezone.utc) @@ -36,7 +36,7 @@ def write_version(version_core, dev=True): setup( name="resemble-enhance", python_requires=">=3.10", - version=write_version("0.0.1"), + version=write_version("0.0.1", pre_release=False), description="Speech denoising and enhancement with deep learning", long_description=long_description, long_description_content_type="text/markdown",