improves voice detector
This commit is contained in:
@@ -184,26 +184,26 @@ impl Audio {
|
||||
let stream = raw_mic_input
|
||||
.possibly_disconnected_channels_to_mono()
|
||||
.constant_samplerate(SAMPLE_RATE)
|
||||
.limit(LimitSettings::live_performance())
|
||||
.process_buffer::<BUFFER_SIZE, _>(move |buffer| {
|
||||
let mut int_buffer: [i16; _] = buffer.map(|s| s.to_sample());
|
||||
if voip_parts
|
||||
.echo_canceller
|
||||
.lock()
|
||||
.process_stream(
|
||||
&mut int_buffer,
|
||||
SAMPLE_RATE.get() as i32,
|
||||
CHANNEL_COUNT.get() as i32,
|
||||
)
|
||||
.context("livekit audio processor error")
|
||||
.log_err()
|
||||
.is_some()
|
||||
{
|
||||
for (sample, processed) in buffer.iter_mut().zip(&int_buffer) {
|
||||
*sample = (*processed).to_sample();
|
||||
}
|
||||
}
|
||||
});
|
||||
.limit(LimitSettings::live_performance());
|
||||
// .process_buffer::<BUFFER_SIZE, _>(move |buffer| {
|
||||
// let mut int_buffer: [i16; _] = buffer.map(|s| s.to_sample());
|
||||
// if voip_parts
|
||||
// .echo_canceller
|
||||
// .lock()
|
||||
// .process_stream(
|
||||
// &mut int_buffer,
|
||||
// SAMPLE_RATE.get() as i32,
|
||||
// CHANNEL_COUNT.get() as i32,
|
||||
// )
|
||||
// .context("livekit audio processor error")
|
||||
// .log_err()
|
||||
// .is_some()
|
||||
// {
|
||||
// for (sample, processed) in buffer.iter_mut().zip(&int_buffer) {
|
||||
// *sample = (*processed).to_sample();
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
// .denoise()
|
||||
// .context("Could not set up denoiser")?
|
||||
// .automatic_gain_control(automatic_gain_control_settings())
|
||||
|
||||
@@ -114,7 +114,7 @@ pub struct ConstantChannelCount<S: Source> {
|
||||
}
|
||||
|
||||
impl<S: Source> ConstantChannelCount<S> {
|
||||
fn new(source: S, target_channels: ChannelCount) -> Self {
|
||||
pub fn new(source: S, target_channels: ChannelCount) -> Self {
|
||||
let input_channels = source.channels();
|
||||
let sample_rate = source.sample_rate();
|
||||
let inner = ChannelCountConverter::new(source, input_channels, target_channels);
|
||||
|
||||
@@ -142,7 +142,7 @@ mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::{
|
||||
test::{recording_of_davids_voice, sine},
|
||||
test::{recording_of_voice, sine},
|
||||
RodioExt,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
@@ -214,7 +214,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn constant_samplerate_preserves_length() {
|
||||
let test_signal = recording_of_davids_voice(nz!(3), nz!(48_000));
|
||||
let test_signal = recording_of_voice(nz!(3), nz!(48_000));
|
||||
let resampled = test_signal.clone().constant_samplerate(nz!(16_000));
|
||||
|
||||
let diff_in_length = test_signal
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
use std::env::current_dir;
|
||||
use std::io::Cursor;
|
||||
use std::iter;
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -21,10 +22,14 @@ use crate::audio_settings::LIVE_SETTINGS;
|
||||
use crate::test::detector::BasicVoiceDetector;
|
||||
use crate::{Audio, LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE, RodioExt, VoipParts};
|
||||
|
||||
mod detector;
|
||||
// in hz
|
||||
const HUMAN_SPEECH_RANGE: Range<f32> = 90.0..260.0;
|
||||
|
||||
#[gpui::test]
|
||||
fn test_input_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
// strange params to invite bugs to show themselves
|
||||
let test_signal = recording_of_davids_voice(nz!(3), nz!(48_000));
|
||||
let test_signal = recording_of_voice(nz!(3), nz!(48_000));
|
||||
let test_signal_duration = test_signal
|
||||
.total_duration()
|
||||
.expect("recordings have a length");
|
||||
@@ -38,15 +43,20 @@ fn test_input_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
.into_samples_buffer();
|
||||
|
||||
let expected_output =
|
||||
recording_of_davids_voice(input_pipeline.channels(), input_pipeline.sample_rate());
|
||||
rodio::wav_to_file(input_pipeline.clone(), "input_pipeline_output.wav").unwrap();
|
||||
recording_of_voice(input_pipeline.channels(), input_pipeline.sample_rate());
|
||||
|
||||
rodio::wav_to_file(
|
||||
BasicVoiceDetector::add_voice_activity_as_channel(input_pipeline.clone()),
|
||||
"input_pipeline_output.wav",
|
||||
)
|
||||
.unwrap();
|
||||
rodio::wav_to_file(expected_output.clone(), "input_pipeline_expect.wav").unwrap();
|
||||
assert_similar_voice_spectra(expected_output, input_pipeline);
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
fn test_output_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
let test_signal = recording_of_davids_voice(LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE);
|
||||
let test_signal = recording_of_voice(LEGACY_CHANNEL_COUNT, LEGACY_SAMPLE_RATE);
|
||||
let test_signal_duration = test_signal
|
||||
.total_duration()
|
||||
.expect("recordings have a length");
|
||||
@@ -65,7 +75,7 @@ fn test_output_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
// dont care about the channel count and sample rate, as long as the voice
|
||||
// signal matches
|
||||
let expected_output =
|
||||
recording_of_davids_voice(output_pipeline.channels(), output_pipeline.sample_rate());
|
||||
recording_of_voice(output_pipeline.channels(), output_pipeline.sample_rate());
|
||||
rodio::wav_to_file(output_pipeline.clone(), "output_pipeline_output.wav").unwrap();
|
||||
rodio::wav_to_file(expected_output.clone(), "output_pipeline_expect.wav").unwrap();
|
||||
assert_similar_voice_spectra(expected_output, output_pipeline);
|
||||
@@ -74,7 +84,7 @@ fn test_output_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
// TODO make a perf variant
|
||||
#[gpui::test]
|
||||
fn test_full_audio_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
let test_signal = recording_of_davids_voice(nz!(3), nz!(44_100));
|
||||
let test_signal = recording_of_voice(nz!(3), nz!(44_100));
|
||||
let test_signal_duration = test_signal
|
||||
.total_duration()
|
||||
.expect("recordings have a length");
|
||||
@@ -92,15 +102,21 @@ fn test_full_audio_pipeline(cx: &mut gpui::TestAppContext) {
|
||||
|
||||
// dont care about the channel count and sample rate, as long as the voice
|
||||
// signal matches
|
||||
let expected_output =
|
||||
recording_of_davids_voice(full_pipeline.channels(), full_pipeline.sample_rate());
|
||||
let expected_output = recording_of_voice(full_pipeline.channels(), full_pipeline.sample_rate());
|
||||
rodio::wav_to_file(full_pipeline.clone(), "full_pipeline_output.wav").unwrap();
|
||||
rodio::wav_to_file(expected_output.clone(), "full_pipeline_expected.wav").unwrap();
|
||||
rodio::wav_to_file(expected_output.clone(), "full_pipeline_expect.wav").unwrap();
|
||||
assert_similar_voice_spectra(expected_output, full_pipeline);
|
||||
}
|
||||
|
||||
fn energy_of_spectrum(spectrum: &FrequencySpectrum) -> f32 {
|
||||
spectrum.max().1.val()
|
||||
fn human_perceivable_energy(spectrum: &FrequencySpectrum) -> f32 {
|
||||
spectrum
|
||||
.data()
|
||||
.iter()
|
||||
.filter(|(freq, _)| HUMAN_SPEECH_RANGE.contains(&freq.val()))
|
||||
.max_by_key(|(_, energy)| energy)
|
||||
.unwrap()
|
||||
.1
|
||||
.val()
|
||||
}
|
||||
|
||||
fn energy_of_chunk(chunk: &[rodio::Sample], sample_rate: SampleRate) -> f32 {
|
||||
@@ -112,7 +128,7 @@ fn energy_of_chunk(chunk: &[rodio::Sample], sample_rate: SampleRate) -> f32 {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
energy_of_spectrum(&spectrum)
|
||||
human_perceivable_energy(&spectrum)
|
||||
}
|
||||
|
||||
fn maximum_energy(mut a: impl rodio::Source) -> f32 {
|
||||
@@ -127,10 +143,6 @@ fn maximum_energy(mut a: impl rodio::Source) -> f32 {
|
||||
.fold(0f32, |max, energy| max.max(energy))
|
||||
}
|
||||
|
||||
const CHUNK_DURATION: Duration = Duration::from_millis(100);
|
||||
|
||||
mod detector;
|
||||
|
||||
// Test signals should be at least 50% voice
|
||||
fn assert_similar_voice_spectra(
|
||||
expected: impl rodio::Source + Clone,
|
||||
@@ -211,11 +223,17 @@ fn assert_similar_voice_spectra(
|
||||
);
|
||||
}
|
||||
|
||||
fn spectra_chunk_size(source: &impl Source) -> usize {
|
||||
((CHUNK_DURATION.as_secs_f64() * source.sample_rate().get() as f64).ceil() as usize)
|
||||
fn spectra_chunk_size(source: &impl Source, minimum_duration: Duration) -> usize {
|
||||
((minimum_duration.as_secs_f64() * source.sample_rate().get() as f64) as usize)
|
||||
.next_power_of_two()
|
||||
}
|
||||
|
||||
fn spectrum_duration(source: &impl Source, minimum_duration: Duration) -> Duration {
|
||||
Duration::from_secs_f64(
|
||||
spectra_chunk_size(source, minimum_duration) as f64 / source.sample_rate().get() as f64,
|
||||
)
|
||||
}
|
||||
|
||||
fn assert_same_voice_signal(
|
||||
(chunk_start, (expected, pipeline)): (Duration, (FrequencySpectrum, FrequencySpectrum)),
|
||||
) -> Option<bool> {
|
||||
@@ -232,7 +250,7 @@ fn assert_same_voice_signal(
|
||||
panic!(
|
||||
"Could not find fundamental voice freq in output while there is one in the input at {voice_freq_expected}Hz.\nLoudest 5 frequencies in output:\n{}\n\n{}",
|
||||
display_loudest_5_frequencies(&pipeline),
|
||||
plot_spectra(&expected, &pipeline),
|
||||
plot_spectra(&[(&expected, "expected"), (&pipeline, "pipeline")]),
|
||||
);
|
||||
}
|
||||
(Some(voice_freq_expected), Some(voice_freq_pipeline)) => {
|
||||
@@ -243,7 +261,7 @@ fn assert_same_voice_signal(
|
||||
assert!(
|
||||
less_than_10percent_diff((voice_freq_expected, voice_freq_pipeline)),
|
||||
"expected: {voice_freq_expected}, pipeline: {voice_freq_pipeline}, at: {chunk_start:?}\n\n{}",
|
||||
plot_spectra(&expected, &pipeline)
|
||||
plot_spectra(&[(&expected, "expected"), (&pipeline, "pipeline")])
|
||||
);
|
||||
|
||||
// Guards against voice distortion
|
||||
@@ -256,14 +274,12 @@ fn assert_same_voice_signal(
|
||||
}
|
||||
|
||||
fn fundamental_voice_freq(spectrum: &FrequencySpectrum) -> Option<f32> {
|
||||
let human_speech_range = 90.0..260.0;
|
||||
let spectrum: Vec<_> = spectrum.data().iter().collect();
|
||||
spectrum
|
||||
.data()
|
||||
.iter()
|
||||
.filter(|(freq, _)| human_speech_range.contains(&freq.val()))
|
||||
// .inspect(|(freq, ampl)| println!("{freq},{ampl}"))
|
||||
.max_by(|(_, a_ampl), (_, b_ampl)| a_ampl.val().total_cmp(&b_ampl.val()))
|
||||
.map(|(freq, _ampl)| freq.val())
|
||||
.filter(|(freq, _)| HUMAN_SPEECH_RANGE.contains(&freq.val()))
|
||||
.max_by_key(|(_, energy)| energy)
|
||||
.map(|(freq, _)| freq.val())
|
||||
}
|
||||
|
||||
fn same_ratio_between_harmonics(
|
||||
@@ -313,7 +329,7 @@ fn display_loudest_5_frequencies(spectrum: &FrequencySpectrum) -> String {
|
||||
}
|
||||
|
||||
// Returns ascii encoding a link to open the plot
|
||||
fn plot_spectra(expected: &FrequencySpectrum, pipeline: &FrequencySpectrum) -> String {
|
||||
pub fn plot_spectra(spectra: &[(&FrequencySpectrum, &str)]) -> String {
|
||||
use plotly::{Bar, Plot};
|
||||
|
||||
let mut plot = Plot::new();
|
||||
@@ -322,29 +338,16 @@ fn plot_spectra(expected: &FrequencySpectrum, pipeline: &FrequencySpectrum) -> S
|
||||
// .y_axis(Axis::new().type_(plotly::layout::AxisType::Log));
|
||||
plot.set_layout(layout);
|
||||
|
||||
let (x, y): (Vec<_>, Vec<_>) = expected
|
||||
.data()
|
||||
.iter()
|
||||
.map(|(freq, amplitude)| (freq.val(), amplitude.val()))
|
||||
.filter(|(freq, _)| *freq > 85.0)
|
||||
.unzip();
|
||||
let trace = Bar::new(x, y)
|
||||
.name("expected")
|
||||
.show_legend(true)
|
||||
.opacity(0.5);
|
||||
plot.add_trace(trace);
|
||||
|
||||
let (x, y): (Vec<_>, Vec<_>) = pipeline
|
||||
.data()
|
||||
.iter()
|
||||
.map(|(freq, amplitude)| (freq.val(), amplitude.val()))
|
||||
.filter(|(freq, _)| *freq > 85.0)
|
||||
.unzip();
|
||||
let trace = Bar::new(x, y)
|
||||
.name("pipeline")
|
||||
.show_legend(true)
|
||||
.opacity(0.5);
|
||||
plot.add_trace(trace);
|
||||
for (spectrum, label) in spectra {
|
||||
let (x, y): (Vec<_>, Vec<_>) = spectrum
|
||||
.data()
|
||||
.iter()
|
||||
.map(|(freq, amplitude)| (freq.val(), amplitude.val()))
|
||||
.filter(|(freq, _)| *freq > 85.0)
|
||||
.unzip();
|
||||
let trace = Bar::new(x, y).name(label).show_legend(true).opacity(0.5);
|
||||
plot.add_trace(trace);
|
||||
}
|
||||
|
||||
let path = current_dir().unwrap().join("plot.html");
|
||||
plot.write_html(&path);
|
||||
@@ -378,7 +381,7 @@ pub(crate) fn sine(channels: ChannelCount, sample_rate: SampleRate) -> impl Sour
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn recording_of_davids_voice(
|
||||
pub(crate) fn recording_of_voice(
|
||||
channels: ChannelCount,
|
||||
sample_rate: SampleRate,
|
||||
) -> impl Source + Clone {
|
||||
@@ -407,7 +410,7 @@ pub(crate) fn recording_of_davids_voice(
|
||||
#[should_panic]
|
||||
fn test_rejects_pitch_shift() {
|
||||
// also known as 'robot/chipmunk voice'
|
||||
let original = recording_of_davids_voice(nz!(1), nz!(44100));
|
||||
let original = recording_of_voice(nz!(1), nz!(44100));
|
||||
let pitch_shifted = original
|
||||
.clone()
|
||||
.speed(1.2) // effectively increases the pitch by 20%
|
||||
@@ -422,7 +425,7 @@ fn test_rejects_pitch_shift() {
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_rejects_large_amounts_of_noise() {
|
||||
let original = recording_of_davids_voice(nz!(1), nz!(44100));
|
||||
let original = recording_of_voice(nz!(1), nz!(44100));
|
||||
let with_noise = add_noise(&original, 0.5);
|
||||
|
||||
assert_similar_voice_spectra(original, with_noise);
|
||||
@@ -430,7 +433,7 @@ fn test_rejects_large_amounts_of_noise() {
|
||||
|
||||
#[test]
|
||||
fn test_ignores_volume() {
|
||||
let original = recording_of_davids_voice(nz!(1), nz!(44100));
|
||||
let original = recording_of_voice(nz!(1), nz!(44100));
|
||||
let amplified = original.clone().amplify(1.42);
|
||||
|
||||
assert_similar_voice_spectra(original, amplified);
|
||||
@@ -438,7 +441,7 @@ fn test_ignores_volume() {
|
||||
|
||||
#[test]
|
||||
fn test_ignore_low_volume_noise() {
|
||||
let original = recording_of_davids_voice(nz!(1), nz!(44100));
|
||||
let original = recording_of_voice(nz!(1), nz!(44100));
|
||||
// 5% noise is quite hearable as the noise is across all frequencies so is
|
||||
// perceived far more intense then a voice
|
||||
let with_noise = add_noise(&original, 0.05);
|
||||
@@ -469,7 +472,7 @@ fn add_noise(original: &(impl Source + Clone + Send + 'static), amount: f32) ->
|
||||
|
||||
#[test]
|
||||
fn test_ignores_small_shifts() {
|
||||
let original = recording_of_davids_voice(nz!(1), nz!(44100));
|
||||
let original = recording_of_voice(nz!(1), nz!(44100));
|
||||
let shifted = iter::repeat(0f32).take(10).chain(original.clone());
|
||||
let shifted = SamplesBuffer::new(
|
||||
original.channels(),
|
||||
|
||||
196
crates/audio/src/test/detector.rs
Normal file
196
crates/audio/src/test/detector.rs
Normal file
@@ -0,0 +1,196 @@
|
||||
use crate::RodioExt;
|
||||
use crate::rodio_ext::ConstantChannelCount;
|
||||
use crate::test::sine;
|
||||
use crate::test::spectrum_duration;
|
||||
|
||||
use super::human_perceivable_energy;
|
||||
|
||||
use rodio::buffer::SamplesBuffer;
|
||||
use rodio::nz;
|
||||
use spectrum_analyzer::FrequencyLimit;
|
||||
use spectrum_analyzer::FrequencySpectrum;
|
||||
use spectrum_analyzer::scaling::divide_by_N_sqrt;
|
||||
use spectrum_analyzer::windows::hann_window;
|
||||
|
||||
use super::maximum_energy;
|
||||
|
||||
use rodio::Source;
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VoiceSegment {
|
||||
pub start: Duration,
|
||||
pub end: Duration,
|
||||
}
|
||||
|
||||
impl VoiceSegment {
|
||||
const ZERO: Self = Self {
|
||||
start: Duration::ZERO,
|
||||
end: Duration::ZERO,
|
||||
};
|
||||
|
||||
fn length(&self) -> Duration {
|
||||
self.end - self.start
|
||||
}
|
||||
|
||||
fn until(&self, other: &Self) -> Duration {
|
||||
debug_assert!(self.end < other.start);
|
||||
other.start - self.end
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct BasicVoiceDetector {
|
||||
pub(crate) segments_with_voice: Vec<VoiceSegment>,
|
||||
}
|
||||
|
||||
impl BasicVoiceDetector {
|
||||
pub(crate) fn new(source: impl Source + Clone) -> Self {
|
||||
// only works on mono
|
||||
let source = ConstantChannelCount::new(source, nz!(1)).into_samples_buffer();
|
||||
|
||||
// this gives a good resolution
|
||||
let minimum_chunk_duration = Duration::from_millis(20);
|
||||
let actual_chunk_duration = spectrum_duration(&source, minimum_chunk_duration);
|
||||
|
||||
let mut spectrum_start_pos = Duration::ZERO;
|
||||
let mut partial_segment = None;
|
||||
|
||||
// empirically determined (by looking in audacity)
|
||||
// see the 'soup' test for how
|
||||
//
|
||||
// while this might seem low remember humans precieve sound
|
||||
// logarithmically. So 40% of energy sounds like 80% volume.
|
||||
let threshold = 0.4 * maximum_energy(source.clone());
|
||||
let segments_with_voice: Vec<_> = iter_spectra(source.clone(), actual_chunk_duration)
|
||||
.filter_map(|spectrum| {
|
||||
let voice_detected = human_perceivable_energy(&spectrum) > threshold;
|
||||
spectrum_start_pos += actual_chunk_duration;
|
||||
match (&mut partial_segment, voice_detected) {
|
||||
(Some(VoiceSegment { end, .. }), true) => *end = spectrum_start_pos,
|
||||
(Some(VoiceSegment { start, .. }), false) => {
|
||||
let res = Some(VoiceSegment {
|
||||
start: *start,
|
||||
end: spectrum_start_pos,
|
||||
});
|
||||
partial_segment = None;
|
||||
return res;
|
||||
}
|
||||
(None, true) => {
|
||||
partial_segment = Some(VoiceSegment {
|
||||
start: spectrum_start_pos,
|
||||
end: spectrum_start_pos,
|
||||
})
|
||||
}
|
||||
(None, false) => partial_segment = None,
|
||||
};
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
segments_with_voice,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn voice_less_duration(&self) -> Duration {
|
||||
self.segments_with_voice
|
||||
.iter()
|
||||
.map(|range| range.end - range.start)
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn beep_where_voice_detected(&self, source: &impl Source) -> SamplesBuffer {
|
||||
let sine = sine(source.channels(), source.sample_rate());
|
||||
|
||||
let mut with_voice = [VoiceSegment::ZERO]
|
||||
.iter()
|
||||
.chain(self.segments_with_voice.iter())
|
||||
.peekable();
|
||||
let mut samples = Vec::new();
|
||||
|
||||
loop {
|
||||
let Some(current_voice_segment) = with_voice.next() else {
|
||||
break;
|
||||
};
|
||||
|
||||
let voice_range_duration = current_voice_segment.length();
|
||||
samples.extend(
|
||||
sine.clone()
|
||||
.amplify(1.0)
|
||||
.take_duration(voice_range_duration),
|
||||
);
|
||||
|
||||
let Some(next_voice_segment) = with_voice.peek() else {
|
||||
break;
|
||||
};
|
||||
let until_next = current_voice_segment.until(next_voice_segment);
|
||||
samples.extend(sine.clone().amplify(0.0).take_duration(until_next));
|
||||
}
|
||||
|
||||
SamplesBuffer::new(nz!(1), source.sample_rate(), samples)
|
||||
}
|
||||
|
||||
pub fn add_voice_activity_as_channel(mut source: impl Source + Clone) -> impl Source {
|
||||
let detector = Self::new(source.clone());
|
||||
let mut voice_activity = detector.beep_where_voice_detected(&source).into_iter();
|
||||
|
||||
let mut samples = Vec::new();
|
||||
loop {
|
||||
let Some(s1) = source.next() else {
|
||||
break;
|
||||
};
|
||||
let Some(s2) = source.next() else {
|
||||
break;
|
||||
};
|
||||
let Some(s3) = voice_activity.next() else {
|
||||
break;
|
||||
};
|
||||
|
||||
samples.extend_from_slice(&[s1, s2, s3]);
|
||||
}
|
||||
SamplesBuffer::new(
|
||||
source.channels().checked_add(1).unwrap(),
|
||||
source.sample_rate(),
|
||||
samples,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_spectra(
|
||||
expected: impl Source + Clone,
|
||||
chunk_duration: Duration,
|
||||
) -> impl Iterator<Item = FrequencySpectrum> {
|
||||
assert!(expected.total_duration().is_some());
|
||||
|
||||
let chunk_size = super::spectra_chunk_size(&expected, chunk_duration);
|
||||
let expected_samples: Vec<_> = expected.clone().collect();
|
||||
expected_samples
|
||||
.chunks_exact(chunk_size)
|
||||
.map(|input| {
|
||||
super::samples_fft_to_spectrum(
|
||||
&hann_window(input),
|
||||
expected.sample_rate().get(),
|
||||
FrequencyLimit::Min(4.0),
|
||||
Some(÷_by_N_sqrt),
|
||||
)
|
||||
.unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use crate::test::{detector::BasicVoiceDetector, recording_of_voice};
|
||||
use rodio::{nz, wav_to_file};
|
||||
|
||||
#[test]
|
||||
fn soup() {
|
||||
let original = recording_of_voice(nz!(1), nz!(48000));
|
||||
let detector = BasicVoiceDetector::new(original.clone());
|
||||
let siny = detector.beep_where_voice_detected(&original);
|
||||
wav_to_file(siny, "voice_activity.wav").unwrap();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user