forked from BaekJongWoo/Conv-TasNet-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
127 lines (96 loc) · 3.84 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Copyright (c) 2021 Chanjung Kim. All rights reserved.
# Licensed under the MIT License.
import librosa
import numpy as np
import soundfile as sf
import tensorflow as tf
import youtube_dl
from absl import app
from absl import flags
from pathlib import Path
from os import path, listdir
from conv_tasnet import ConvTasNetParam, ConvTasNet
from dataset import Dataset
FLAGS = flags.FLAGS
flags.DEFINE_string("checkpoint", None,
"Directory containing saved weights", required=True)
flags.DEFINE_string("video_id", None, "YouTube video ID", required=True)
flags.DEFINE_bool("interpolate", False,
"Interpolate overlapping part of each rows")
def youtube_dl_hook(d):
if d["status"] == "finished":
print("Done downloading...")
def main(argv):
checkpoint_dir = FLAGS.checkpoint
if not path.exists(checkpoint_dir):
raise ValueError(f"'{checkpoint_dir}' does not exist")
checkpoints = [name for name in listdir(checkpoint_dir) if "ckpt" in name]
if not checkpoints:
raise ValueError(f"No checkpoint exists")
checkpoints.sort()
checkpoint_name = checkpoints[-1].split(".")[0]
param = ConvTasNetParam.load(f"{checkpoint_dir}/config.txt")
model = ConvTasNet.make(param)
model.load_weights(f"{checkpoint_dir}/{checkpoint_name}.ckpt")
video_id = FLAGS.video_id
ydl_opts = {
"format": "bestaudio/best",
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
"preferredquality": "44100",
}],
"outtmpl": "%(title)s.wav",
"progress_hooks": [youtube_dl_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_id, download=False)
status = ydl.download([video_id])
title = info.get("title", None)
filename = title + ".wav"
audio, sr = librosa.load(filename, sr=44100, mono=True)
num_samples = audio.shape[0]
num_portions = (num_samples - param.overlap) // (param.That *
(param.L - param.overlap))
num_samples_output = num_portions * param.That * (param.L - param.overlap)
num_samples = num_samples_output + param.overlap
if FLAGS.interpolate:
def filter_gen(n):
if n < param.overlap:
return n / param.overlap
elif n > param.L - param.overlap:
return (param.L - n) / param.overlap
else:
return 1
output_filter = np.array([filter_gen(n) for n in range(param.L)])
print("predicting...")
audio = audio[:num_samples]
model_input = np.zeros((num_portions, param.That, param.L))
for i in range(num_portions):
for j in range(param.That):
begin = (i * param.That + j) * (param.L - param.overlap)
end = begin + param.L
model_input[i][j] = audio[begin:end]
separated = model.predict(model_input)
separated = np.transpose(separated, (1, 0, 2, 3))
if FLAGS.interpolate:
separated = output_filter * separated
overlapped = separated[:, :, :, (param.L - param.overlap):]
overlapped = np.pad(
overlapped,
pad_width=((0, 0), (0, 0), (0, 0),
(0, param.L - 2 * param.overlap)),
mode="constant",
constant_values=0)
overlapped = np.reshape(overlapped, (param.C, num_samples_output))
overlapped[:, 1:] = overlapped[:, :-1]
overlapped[:, 0] = 0
separated = separated[:, :, :, :(param.L - param.overlap)]
separated = np.reshape(separated, (param.C, num_samples_output))
if FLAGS.interpolate:
separated += overlapped
print("saving...")
for idx, stem in enumerate(Dataset.STEMS):
sf.write(f"{title}_{stem}.wav", separated[idx], sr)
if __name__ == '__main__':
app.run(main)