Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Saving of audio segments without reading audio into memory #4

Open
wants to merge 1 commit into
base: tumu
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,4 @@ include/
output/

test.*
out/
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cSpell.words": [
"audiosegment"
]
}
3 changes: 3 additions & 0 deletions pyvenv.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
home = /opt/homebrew/opt/[email protected]/bin
include-system-site-packages = false
version = 3.10.8
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
pydub==0.22.1
webrtcvad==2.0.10
pydub==0.25.1
webrtcvad-wheels==2.0.11.post1
python-ffmpeg==2.0.1
pytest
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

setup(
name='wahi-korero',
version='v0.5.3',
version='v0.6.0',
description='A tool for identifying and extracting segments of speech in audio.',
url='https://github.com/TeHikuMedia/wahi-korero',
author='@craigthelinguist, @kmahelona',
author_email='[email protected]',
license='Kaitiakitanga License',
packages=['wahi_korero'],
install_requires=[
'pydub==0.22.1',
'webrtcvad==2.0.10',
'pydub==0.25.1',
'webrtcvad-wheels==2.0.11.post1',
],
)
Binary file added test/sounds/hello.aac
Binary file not shown.
Binary file added test/sounds/hello.aiff
Binary file not shown.
Binary file added test/sounds/hello.flac
Binary file not shown.
Binary file added test/sounds/hello.flv
Binary file not shown.
Binary file added test/sounds/hello.m4a
Binary file not shown.
Binary file added test/sounds/hello.mp3
Binary file not shown.
Binary file added test/sounds/hello.mp4
Binary file not shown.
Binary file added test/sounds/hello.ogg
Binary file not shown.
Binary file added test/sounds/hello.wma
Binary file not shown.
156 changes: 105 additions & 51 deletions test/test_segmenter.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,36 @@

# Make `wahi_korero` visible on sys.path
import sys
sys.path.append("..")

import json
import os
from os import path
from wahi_korero.segment import ConfigError, default_segmenter, FormatError
from wahi_korero.utils import SUPPORTED_FORMATS
from unittest import TestCase
from pydub import AudioSegment
import unittest
from wahi_korero import ConfigError, default_segmenter, FormatError
from os import path
import os
import json
import sys
import subprocess
import pytest

output_dir = "out"


class SegmenterIntegrationTests(unittest.TestCase):
class TestSegmenterIntegrationTests(TestCase):

def setUp(self):
if not path.exists(output_dir):
os.mkdir(output_dir)
for f in os.listdir(output_dir):
os.remove(path.join(output_dir, f))
self.segmenter = default_segmenter()
self.segmenter.disable_captioning()

def test_segmenting(self):
self.segmenter.segment_audio("sounds/hello.wav", output_dir, verbose=False)
for fmt in SUPPORTED_FORMATS:
out = os.path.join(output_dir, 'segmenting', fmt)
os.makedirs(out, exist_ok=True)
self.segmenter.segment_audio(
f"test/sounds/hello.{fmt}", out, verbose=False)

def test_json_output(self):
self.segmenter.segment_audio("sounds/hello.wav", output_dir, verbose=False)
stream = self.segmenter.segment_stream("sounds/hello.wav")
self.segmenter.segment_audio(
"test/sounds/hello.wav", output_dir, verbose=False)
stream = self.segmenter.segment_stream("test/sounds/hello.wav")
num_segs = sum(1 for _ in stream)

try:
Expand All @@ -44,52 +46,104 @@ def test_json_output(self):
self.fail("Unexpected error {}".format(e))

def test_segment_against_track_len(self):
self.segmenter.segment_audio("sounds/hello.wav", output_dir, verbose=False)
stream = self.segmenter.segment_stream("sounds/hello.wav", output_audio=True)

try:
with open(path.join(output_dir, "segments.json"), "r") as f:
data = json.load(f)
except Exception as e:
self.fail("Unexpected error loading JSON: {}".format(e))

for (_, audio), seg in zip(stream, data["segments"]):
self.assertEqual(round(seg["end"] - seg["start"], 3), len(audio) / 1000,
"Segments in JSON file should correspond to length of audio track.")
for f in SUPPORTED_FORMATS:

self.segmenter.segment_audio(
f"test/sounds/hello.{f}", output_dir, verbose=False)
stream = self.segmenter.segment_stream(
f"test/sounds/hello.{f}", output_audio=True)

try:
with open(path.join(output_dir, "segments.json"), "r") as f:
data = json.load(f)
except Exception as e:
self.fail("Unexpected error loading JSON: {}".format(e))

print(data)
for (_, audio), seg in zip(stream, data["segments"]):
print(data['segments'])
self.assertEqual(
round(seg["end"] - seg["start"], 4),
round(len(audio) / 1000, 4),
"Segments in JSON file should correspond to length of audio track."
)

def test_segment_against_stream(self):
self.segmenter.segment_audio("sounds/hello.wav", output_dir, verbose=False)
stream = self.segmenter.segment_stream("sounds/hello.wav")

try:
with open(path.join(output_dir, "segments.json"), "r") as f:
data = json.load(f)
except Exception as e:
self.fail("Unexpected error loading JSON: {}".format(e))

for (seg1, _), seg2 in zip(stream, data["segments"]):
self.assertEqual(round(seg1[0], 3), round(seg1[0], 3), "Segments should be same in stream as in json.")
self.assertEqual(round(seg1[1], 3), round(seg1[1], 3), "Segments should be same in stream as in json.")
for f in SUPPORTED_FORMATS:
self.segmenter.segment_audio(
f"test/sounds/hello.{f}", output_dir, verbose=False)
stream = self.segmenter.segment_stream(f"test/sounds/hello.{f}")

try:
with open(path.join(output_dir, "segments.json"), "r") as f:
data = json.load(f)
except Exception as e:
self.fail("Unexpected error loading JSON: {}".format(e))

for (seg1, _), seg2 in zip(stream, data["segments"]):
self.assertEqual(round(seg1[0], 3), round(
seg1[0], 3), "Segments should be same in stream as in json.")
self.assertEqual(round(seg1[1], 3), round(
seg1[1], 3), "Segments should be same in stream as in json.")

def test_non_audio(self):
try:
self.segmenter.segment_audio("test_segmenter.py", "out")
self.fail("Segmenter should have failed gracefully on unsupported file format.")
self.segmenter.segment_audio("test/test_segmenter.py", "out")
self.fail(
"Segmenter should have failed gracefully on unsupported file format."
)
except FormatError:
pass # desired behaviour
except:
self.fail("Segmenter should have failed gracefully on unsupported file format.")
pass # desired behavior

def test_captioning(self):
self.segmenter.enable_captioning(500)
self.segmenter.segment_audio("sounds/hello.wav", output_dir, verbose=False)
for fmt in SUPPORTED_FORMATS:
print(f"testing captions for {fmt}")
out = os.path.join(output_dir, 'captioning', fmt)
os.makedirs(out, exist_ok=True)
self.segmenter.enable_captioning(500)
self.segmenter.segment_audio(
f"test/sounds/hello.{fmt}", out, verbose=False)

with open(path.join(out, "segments.json"), "r") as f:
data = json.load(f)

duration = 0
file_duration = 0
for i in range(data['num_segments']):
duration += data['segments'][i]['end'] - \
data['segments'][i]['start']
p = subprocess.Popen(
['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1',
path.join(out, data['segments'][i]['fname'])],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)

output, errors = p.communicate()
file_duration += float(output)

self.assertEqual(
round(duration, 2),
round(data['track_duration'], 2),
"json " + fmt
)
self.assertEqual(
round(duration, 2),
round(file_duration, 2),
"files " + fmt
)
print(f"captions for {fmt} PASSED")

def test_captioning_min_length(self):
audio_seg = AudioSegment.from_file("sounds/hello.wav", format="wav")
audio_seg = AudioSegment.from_file(
"test/sounds/hello.wav", format="wav")
audio_len = len(audio_seg)
self.segmenter.enable_captioning(audio_len, min_caption_len_ms=audio_len)
caption_stream = self.segmenter.segment_stream("sounds/hello.wav")
self.assertEqual(len(list(caption_stream)), 1, "Should have one caption") # one caption, the whole length of the track
self.segmenter.enable_captioning(
audio_len, min_caption_len_ms=audio_len)
caption_stream = self.segmenter.segment_stream("test/sounds/hello.wav")
# one caption, the whole length of the track
self.assertEqual(len(list(caption_stream)),
1, "Should have one caption")


if __name__ == "__main__":
unittest.main(verbosity=2)
4 changes: 0 additions & 4 deletions wahi_korero/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@

name = "wahi_korero"

from .segment import ConfigError, DEFAULT_CONFIG, default_segmenter, FormatError, Segmenter, frame_audio, frame_stream
Loading