Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add threading.Lock() to 'support' concurrent requests #22

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 36 additions & 18 deletions .github/workflows/build-and-push-to-ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ jobs:
build-and-push-to-ghcr-cuda118:
runs-on: ubuntu-22.04
steps:
-
name: Set owner name to lower case
run: |
echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV}
env:
OWNER: '${{ github.repository_owner }}'
-
name: Checkout
uses: actions/checkout@v3
Expand All @@ -21,7 +27,7 @@ jobs:
docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
- name: 'Remove cache'
run: |
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
Expand All @@ -34,8 +40,8 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile
push: false # Do not push image for PR
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-${{ github.event.number }}

- name: Build and Push image Cuda 11.8
if: github.ref == 'refs/heads/main'
Expand All @@ -44,14 +50,20 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile
push: true # Push if merged
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest, ghcr.io/coqui-ai/xtts-streaming-server:main-${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest
tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-${{ github.sha }}
#build-args:

build-and-push-to-ghcr-cuda121:
runs-on: ubuntu-22.04
steps:
-
name: Set owner name to lower case
run: |
echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV}
env:
OWNER: '${{ github.repository_owner }}'
-
name: Checkout
uses: actions/checkout@v3
Expand All @@ -66,7 +78,7 @@ jobs:
docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
- name: 'Remove cache'
run: |
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
Expand All @@ -79,8 +91,8 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda121
push: false # Do not push image for PR
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}

- name: Build and Push image cuda 12.1
if: github.ref == 'refs/heads/main'
Expand All @@ -89,13 +101,19 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda121
push: true # Push if merged
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda121-${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121
tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest-cuda121, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-cuda121-${{ github.sha }}
#build-args:
build-and-push-to-ghcr-cpu:
runs-on: ubuntu-22.04
steps:
-
name: Set owner name to lower case
run: |
echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV}
env:
OWNER: '${{ github.repository_owner }}'
-
name: Checkout
uses: actions/checkout@v3
Expand All @@ -110,7 +128,7 @@ jobs:
docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
- name: 'Remove cache'
run: |
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
Expand All @@ -123,8 +141,8 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile.cpu
push: false # Do not push image for PR
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }}

- name: Build and Push image CPU
if: github.ref == 'refs/heads/main'
Expand All @@ -133,7 +151,7 @@ jobs:
context: "{{defaultContext}}:server"
file: Dockerfile.cpu
push: true # Push if merged
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu, ghcr.io/coqui-ai/xtts-streaming-server:main-cpu-${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu
cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu
tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest-cpu, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-cpu-${{ github.sha }}
#build-args:
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# XTTS streaming server
*Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.*
*Warning: XTTS-streaming-server is a demo server, not meant for production.*

https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c

Expand Down Expand Up @@ -81,3 +81,6 @@ $ cd xtts-streaming-server/test
$ python -m pip install -r requirements.txt
$ python test_streaming.py
```

### Forked Repos
If forked, GitHub action will automatically build and push a Docker image to your container registry - so it will be ghcr.io/yourusername/xtts-streaming-server.
90 changes: 49 additions & 41 deletions server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import wave
import torch
import numpy as np
import threading
from typing import List
from pydantic import BaseModel

Expand All @@ -19,7 +20,7 @@
torch.set_num_threads(int(os.environ.get("NUM_THREADS", os.cpu_count())))
device = torch.device("cuda" if os.environ.get("USE_CPU", "0") == "0" else "cpu")
if not torch.cuda.is_available() and device == "cuda":
raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.")
raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.")

custom_model_path = os.environ.get("CUSTOM_MODEL_PATH", "/app/tts_models")

Expand All @@ -44,6 +45,9 @@

print("Running XTTS Server ...", flush=True)

lock = threading.Lock() # Create a lock object
print("Establishing lock ...", flush=True)

##### Run fastapi #####
app = FastAPI(
title="XTTS Streaming server",
Expand All @@ -52,20 +56,20 @@
docs_url="/",
)


@app.post("/clone_speaker")
def predict_speaker(wav_file: UploadFile):
"""Compute conditioning inputs from reference audio file."""
temp_audio_name = next(tempfile._get_candidate_names())
with open(temp_audio_name, "wb") as temp, torch.inference_mode():
temp.write(io.BytesIO(wav_file.file.read()).getbuffer())
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
temp_audio_name
)
return {
"gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(),
"speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(),
}
with lock:
"""Compute conditioning inputs from reference audio file."""
temp_audio_name = next(tempfile._get_candidate_names())
with open(temp_audio_name, "wb") as temp, torch.inference_mode():
temp.write(io.BytesIO(wav_file.file.read()).getbuffer())
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
temp_audio_name
)
return {
"gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(),
"speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(),
}


def postprocess(wav):
Expand Down Expand Up @@ -137,10 +141,11 @@ def predict_streaming_generator(parsed_input: dict = Body(...)):

@app.post("/tts_stream")
def predict_streaming_endpoint(parsed_input: StreamingInputs):
return StreamingResponse(
predict_streaming_generator(parsed_input),
media_type="audio/wav",
)
with lock:
return StreamingResponse(
predict_streaming_generator(parsed_input),
media_type="audio/wav",
)

class TTSInputs(BaseModel):
speaker_embedding: List[float]
Expand All @@ -150,36 +155,39 @@ class TTSInputs(BaseModel):

@app.post("/tts")
def predict_speech(parsed_input: TTSInputs):
speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
text = parsed_input.text
language = parsed_input.language

out = model.inference(
text,
language,
gpt_cond_latent,
speaker_embedding,
)
with lock:
speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
text = parsed_input.text
language = parsed_input.language

out = model.inference(
text,
language,
gpt_cond_latent,
speaker_embedding,
)

wav = postprocess(torch.tensor(out["wav"]))
wav = postprocess(torch.tensor(out["wav"]))

return encode_audio_common(wav.tobytes())
return encode_audio_common(wav.tobytes())


@app.get("/studio_speakers")
def get_speakers():
if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"):
return {
speaker: {
"speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(),
"gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(),
with lock:
if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"):
return {
speaker: {
"speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(),
"gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(),
}
for speaker in model.speaker_manager.speakers.keys()
}
for speaker in model.speaker_manager.speakers.keys()
}
else:
return {}

else:
return {}

@app.get("/languages")
def get_languages():
return config.languages
with lock:
return config.languages