-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Inference_only * Add new endpoints * Add demo * Fix endpoint name * Update gradio demo * Getting language feedback first * Update readme * Update to latest TTS * Update docs
- Loading branch information
1 parent
818a108
commit 7eb6bc2
Showing
6 changed files
with
175 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
demo_outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import gradio as gr | ||
import requests | ||
import base64 | ||
import tempfile | ||
import json | ||
import os | ||
|
||
|
||
SERVER_URL = 'http://localhost:8000' | ||
OUTPUT = "./demo_outputs" | ||
cloned_speakers = {} | ||
|
||
print("Preparing file structure...") | ||
if not os.path.exists(OUTPUT): | ||
os.mkdir(OUTPUT) | ||
os.mkdir(os.path.join(OUTPUT, "cloned_speakers")) | ||
os.mkdir(os.path.join(OUTPUT, "generated_audios")) | ||
elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")): | ||
print("Loading existing cloned speakers...") | ||
for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")): | ||
if file.endswith(".json"): | ||
with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp: | ||
cloned_speakers[file[:-5]] = json.load(fp) | ||
print("Available cloned speakers:", ", ".join(cloned_speakers.keys())) | ||
|
||
try: | ||
print("Getting metadata from server ...") | ||
LANUGAGES = requests.get(SERVER_URL + "/languages").json() | ||
print("Available languages:", ", ".join(LANUGAGES)) | ||
STUDIO_SPEAKERS = requests.get(SERVER_URL + "/studio_speakers").json() | ||
print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys())) | ||
except: | ||
raise Exception("Please make sure the server is running first.") | ||
|
||
|
||
def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names): | ||
files = {"wav_file": ("reference.wav", open(upload_file, "rb"))} | ||
embeddings = requests.post(SERVER_URL + "/clone_speaker", files=files).json() | ||
with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp: | ||
json.dump(embeddings, fp) | ||
cloned_speakers[clone_speaker_name] = embeddings | ||
cloned_speaker_names.append(clone_speaker_name) | ||
return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown.update(choices=cloned_speaker_names) | ||
|
||
def tts(text, speaker_type, speaker_name_studio, speaker_name_custom, lang): | ||
embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom] | ||
generated_audio = requests.post( | ||
SERVER_URL + "/tts", | ||
json={ | ||
"text": text, | ||
"language": lang, | ||
"speaker_embedding": embeddings["speaker_embedding"], | ||
"gpt_cond_latent": embeddings["gpt_cond_latent"] | ||
} | ||
).content | ||
generated_audio_path = os.path.join("demo_outputs", "generated_audios", next(tempfile._get_candidate_names()) + ".wav") | ||
with open(generated_audio_path, "wb") as fp: | ||
fp.write(base64.b64decode(generated_audio)) | ||
return fp.name | ||
|
||
with gr.Blocks() as demo: | ||
cloned_speaker_names = gr.State(list(cloned_speakers.keys())) | ||
with gr.Tab("TTS"): | ||
with gr.Column() as row4: | ||
with gr.Row() as col4: | ||
speaker_name_studio = gr.Dropdown( | ||
label="Studio speaker", | ||
choices=STUDIO_SPEAKERS.keys(), | ||
value="Asya Anara" if "Asya Anara" in STUDIO_SPEAKERS.keys() else None, | ||
) | ||
speaker_name_custom = gr.Dropdown( | ||
label="Cloned speaker", | ||
choices=cloned_speaker_names.value, | ||
value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None, | ||
) | ||
speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio") | ||
with gr.Column() as col2: | ||
lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="en") | ||
text = gr.Textbox(label="text", value="A quick brown fox jumps over the lazy dog.") | ||
tts_button = gr.Button(value="TTS") | ||
with gr.Column() as col3: | ||
generated_audio = gr.Audio(label="Generated audio", autoplay=True) | ||
with gr.Tab("Clone a new speaker"): | ||
with gr.Column() as col1: | ||
upload_file = gr.Audio(label="Upload reference audio", type="filepath") | ||
clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker") | ||
clone_button = gr.Button(value="Clone speaker") | ||
|
||
clone_button.click( | ||
fn=clone_speaker, | ||
inputs=[upload_file, clone_speaker_name, cloned_speaker_names], | ||
outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom], | ||
) | ||
|
||
tts_button.click( | ||
fn=tts, | ||
inputs=[text, speaker_type, speaker_name_studio, speaker_name_custom, lang], | ||
outputs=[generated_audio], | ||
) | ||
|
||
if __name__ == "__main__": | ||
demo.launch( | ||
share=False, | ||
debug=False, | ||
server_port=3009, | ||
server_name="0.0.0.0", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
requests==2.31.0 | ||
gradio==3.50.2 |