Skip to content

Commit

Permalink
Merge pull request #24 from nicolasperez19/random-audio-errors
Browse files Browse the repository at this point in the history
feat: Fixed random audio errors with `soundcard` and `pydub`
  • Loading branch information
nicolasperez19 authored Nov 11, 2024
2 parents 9a18c81 + b1c2525 commit 281bed7
Show file tree
Hide file tree
Showing 9 changed files with 198 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Iterator, Optional, Tuple
from passive_sound_localization.models.configs.localization import LocalizationConfig
# from models.configs.localization import LocalizationConfig # Only needed to run with `realtime_audio.py`
# from passive_sound_localization.models.configs.localization import LocalizationConfig
from models.configs.localization import LocalizationConfig # Only needed to run with `realtime_audio.py`
from dataclasses import dataclass
import numpy as np
import logging
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
from passive_sound_localization.models.configs.localization import (
LocalizationConfig,
)
from passive_sound_localization.models.configs.logging import (
LoggingConfig,
)
from passive_sound_localization.models.configs.feature_flags import (
FeatureFlagsConfig,
)

from passive_sound_localization.models.configs.openai_websocket import (
OpenAIWebsocketConfig,
)

from passive_sound_localization.models.configs.realtime_streamer import RealtimeAudioStreamerConfig

# from models.configs.localization import (
# from passive_sound_localization.models.configs.localization import (
# LocalizationConfig,
# ) # Need import paths like this to test audio streaming with `realtime_audio.py`
# from models.configs.logging import (
# )
# from passive_sound_localization.models.configs.logging import (
# LoggingConfig,
# ) # Need import paths like this to test audio streaming with `realtime_audio.py`
# from models.configs.feature_flags import (
# )
# from passive_sound_localization.models.configs.feature_flags import (
# FeatureFlagsConfig,
# ) # Need import paths like this to test audio streaming with `realtime_audio.py`
# from models.configs.realtime_streamer import RealtimeAudioStreamerConfig # Need import paths like this to test audio streaming with `realtime_audio.py`
# from models.configs.openai_websocket import OpenAIWebsocketConfig # Need import paths like this to test audio streaming with `realtime_audio.py`
# )

# from passive_sound_localization.models.configs.openai_websocket import (
# OpenAIWebsocketConfig,
# )

# from passive_sound_localization.models.configs.realtime_streamer import RealtimeAudioStreamerConfig

from models.configs.localization import (
LocalizationConfig,
) # Need import paths like this to test audio streaming with `realtime_audio.py`
from models.configs.logging import (
LoggingConfig,
) # Need import paths like this to test audio streaming with `realtime_audio.py`
from models.configs.feature_flags import (
FeatureFlagsConfig,
) # Need import paths like this to test audio streaming with `realtime_audio.py`
from models.configs.realtime_streamer import RealtimeAudioStreamerConfig # Need import paths like this to test audio streaming with `realtime_audio.py`
from models.configs.openai_websocket import OpenAIWebsocketConfig # Need import paths like this to test audio streaming with `realtime_audio.py`


from dataclasses import dataclass, field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@dataclass(frozen=True)
class LocalizationConfig:
speed_of_sound: float = 343.0 # Speed of sound in m/s
sample_rate: int = 24000 # Sample rate of the audio in Hz
sample_rate: int = 44100 # Sample rate of the audio in Hz
fft_size: int = 1024 # Size of FFT to use

mic_positions: List[List[float]] = field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

@dataclass(frozen=True)
class RealtimeAudioStreamerConfig:
sample_rate: int = 24000
sample_rate: int = 44100
channels: int = 1
chunk: int = 1024
device_indices: List[int] = field(default_factory=lambda: [2, 3, 4, 5]) # Lab configuration
# device_indices: List[int] = field(default_factory=lambda: [2, 3, 4, 5]) # Lab configuration
device_indices: List[int] = field(default_factory=lambda: [1, 2, 3, 4]) # Nico's laptop (configuration with soundcard)
# device_indices: List[int] = field(default_factory=lambda: [4, 6, 8, 10]) # Nico's laptop (configuration 1)
# device_indices: List[int] = field(default_factory=lambda: [2, 4, 6, 8]) # Nico's laptop (configuration 2)
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
def send_audio_continuously(client, single_channel_generator):
print("Threading...")
for single_channel_audio in single_channel_generator:
if single_channel_audio is None:
continue
client.send_audio(single_channel_audio)


def receive_text_messages(client, logger):
logger.info("OpanAI: Listening to audio stream")
logger.info("OpenAI: Listening to audio stream")
while True:
try:
command = client.receive_text_response()
Expand All @@ -44,13 +46,16 @@ def receive_text_messages(client, logger):
# commands.append(command)
except Exception as e:
print(f"Error receiving response: {e}")
break # Exit loop if server disconnects

def realtime_localization(multi_channel_stream, localizer, logger):
logger.info("Localization: Listening to audio stream")
try:
did_get = True
for audio_data in multi_channel_stream:
# Skip calculating if there's any issues with the audio data
if audio_data is None:
continue

# Stream audio data and pass it to the localizer
localization_stream = localizer.localize_stream(
[audio_data[k] for k in audio_data.keys()]
Expand Down Expand Up @@ -92,11 +97,8 @@ def command_executor(publisher, logger):

def publish_results(localization_results):
print(localization_results)



def main():
print("Hello world")
audio_streamer_config = RealtimeAudioStreamerConfig()
localizer_config = LocalizationConfig()
websocket_config = OpenAIWebsocketConfig()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import soundcard as sc
import numpy as np
import threading
from pydub import AudioSegment
from io import BytesIO


# from passive_sound_localization.models.configs.realtime_streamer import (
# RealtimeAudioStreamerConfig,
Expand All @@ -27,7 +30,7 @@ def __init__(self, config: RealtimeAudioStreamerConfig):
print(self.device_indices)

def __enter__(self):
microphones: List[sc._Microphone] = sc.all_microphones()
microphones = sc.all_microphones()
self.streams = {
device_index: np.zeros((self.chunk, self.channels), dtype=np.float32)
for device_index in self.device_indices
Expand All @@ -42,7 +45,7 @@ def __enter__(self):

return self

def record_audio(self, microphones: List[sc._Microphone]):
def record_audio(self, microphones):
while self.streaming:
for device_index in self.device_indices:
self.streams[device_index] = microphones[device_index].record(
Expand All @@ -58,18 +61,22 @@ def __exit__(self, *args):
def get_stream(self, device_index: int) -> Optional[bytes]:
"""Retrieve the audio stream for a specific device index."""
if device_index in self.device_indices:
return self.streams[device_index].tobytes()
return np.nan_to_num(self.streams[device_index]).tobytes()
else:
print(f"Device index {device_index} not found.")
return None

def multi_channel_gen(self) -> Generator[Dict[int, bytes], None, None]:
def multi_channel_gen(self) -> Generator[Optional[Dict[int, bytes]], None, None]:
try:
while self.streaming:
audio_arrays = []
for device_index in self.device_indices:
audio_arrays.append(self.get_stream(device_index))

# Return none if any audio is None or empty bytes
if any(audio == b"" or audio is None for audio in audio_arrays):
yield None

yield {
device_index: audio
for device_index, audio in zip(self.device_indices, audio_arrays)
Expand All @@ -80,10 +87,31 @@ def multi_channel_gen(self) -> Generator[Dict[int, bytes], None, None]:

def merge_streams(self, streams: List[np.ndarray]) -> np.ndarray:
return np.sum(streams, axis=0) / len(streams)

def resample_stream(self, stream: bytes, target_sample_rate: int = 24000, sample_width: int=2) -> bytes:
try:
audio = AudioSegment.from_file(BytesIO(stream))

def single_channel_gen(self) -> Generator[bytes, None, None]:
# Resample to 24kHz mono pcm16
return audio.set_frame_rate(target_sample_rate).set_channels(self.channels).set_sample_width(sample_width).raw_data

except Exception as e:
print(f"Error in resample_stream: {e}")
return b""


def single_channel_gen(self) -> Generator[Optional[bytes], None, None]:
try:
while self.streaming:
yield self.merge_streams(list(self.streams.values())).tobytes()
stream = self.get_stream(self.device_indices[0])
if stream == b"" or stream is None:
yield None

resampled_stream = self.resample_stream(stream)

if resampled_stream != b"":
yield resampled_stream
else:
yield None
except Exception as e:
print(f"Error in single_channel_gen: {e}")
print(f"Error in single_channel_gen: {e}")
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import logging
from typing import Optional

from passive_sound_localization.models.configs.openai_websocket import OpenAIWebsocketConfig
# from models.configs.openai_websocket import OpenAIWebsocketConfig # Only needed to run with `realtime_audio.py`
# from passive_sound_localization.models.configs.openai_websocket import OpenAIWebsocketConfig
from models.configs.openai_websocket import OpenAIWebsocketConfig # Only needed to run with `realtime_audio.py`

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -111,24 +111,24 @@ def send_audio(self, audio_chunk: bytes) -> None:
"audio": audio_b64
}))

def receive_text_response(self, timeout:float=0.3) -> str:
def receive_text_response(self, timeout:float=5.0) -> str:
try:
# Tries to receive the next message (in a blocking manner) from the OpenAI websocket
# If the message doesn't arrive in 300ms, then it raises a TimeoutError
message = json.loads(self.ws.recv(timeout=timeout))
except TimeoutError:
return OpenAITimeoutError(timeout=timeout)
raise OpenAITimeoutError(timeout=timeout)

# Print message just to see what we're receiving
print(message)
# print(message)

# Checks to see any general errors
if message["type"] == "error":
return OpenAIWebsocketError(error_code=message["error"]["code"], error_message=["error"]["message"])
raise OpenAIWebsocketError(error_code=message["error"]["code"], error_message=["error"]["message"])

# Checks to see whether OpenAI is specifically rate limiting our responses
if message["type"] == "response.done" and message["response"]["status_details"]["error"]["code"] == "rate_limit_exceeded":
return OpenAIRateLimitError()
raise OpenAIRateLimitError()

# Checks to see if an actual text response was sent, and returns the text
if message["type"] == "response.text.done":
Expand Down
Loading

0 comments on commit 281bed7

Please sign in to comment.