Skip to content

Commit

Permalink
Speed up the ASR, without transcode.
Browse files Browse the repository at this point in the history
  • Loading branch information
winlinvip committed Jan 27, 2024
1 parent 3a540cc commit 353b90f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,5 +213,10 @@ The changelog:
* Allow user retry when error. v1.0.34
* Refine badcase for user input. v1.0.35
* Fix bug for setting window for robot. [v1.0.36](https://github.com/ossrs/ai-talk/releases/tag/v1.0.36)
* Support setup API proxy and key for ASR,Chat,TTS. v1.0.37
* Support Tencent Speech to speed up. v1.0.37
* Support share logging text mode. v1.0.38
* Fix some badcase for sentence determine. [v1.0.39](https://github.com/ossrs/ai-talk/releases/tag/v1.0.39)
* Speed up the ASR, without transcode. v1.0.40

Winlin, 2023.12
24 changes: 11 additions & 13 deletions backend/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,21 @@ func NewOpenAIASRService(opts ...func(service *openaiASRService)) ASRService {

func (v *openaiASRService) RequestASR(ctx context.Context, inputFile, language, prompt string, onBeforeRequest func()) (*ASRResult, error) {
outputFile := fmt.Sprintf("%v.m4a", inputFile)

// Transcode input audio in opus or aac, to aac in m4a format.
if os.Getenv("AIT_KEEP_FILES") != "true" {
defer os.Remove(outputFile)
}
if true {
err := exec.CommandContext(ctx, "ffmpeg",
"-i", inputFile,
"-vn", "-c:a", "aac", "-ac", "1", "-ar", "16000", "-ab", "50k",
outputFile,
).Run()

if err != nil {
return nil, errors.Errorf("Error converting the file")
}
logger.Tf(ctx, "Convert audio %v to %v ok", inputFile, outputFile)

// Transcode input audio in opus or aac, to aac in m4a format.
// If need to encode to aac, use:
// "-c:a", "aac", "-ac", "1", "-ar", "16000", "-ab", "30k",
if err := exec.CommandContext(ctx, "ffmpeg",
"-i", inputFile,
"-vn", "-c:a", "copy",
outputFile,
).Run(); err != nil {
return nil, errors.Errorf("Error converting the file")
}
logger.Tf(ctx, "Convert audio %v to %v ok", inputFile, outputFile)

duration, _, err := ffprobeAudio(ctx, outputFile)
if err != nil {
Expand Down

0 comments on commit 353b90f

Please sign in to comment.