final dsp improvement
This commit is contained in:
+171
-30
@@ -66,6 +66,12 @@ const ACF_CONF_MIN: f32 = 0.15; // below this the ACF peak is noise -> ignore
|
|||||||
const ACF_SNAP: f32 = 0.30; // strong + wrong-octave IOI -> snap, don't glide
|
const ACF_SNAP: f32 = 0.30; // strong + wrong-octave IOI -> snap, don't glide
|
||||||
const BPM_FOLD_LO: f32 = 88.0;
|
const BPM_FOLD_LO: f32 = 88.0;
|
||||||
const BPM_FOLD_HI: f32 = 176.0;
|
const BPM_FOLD_HI: f32 = 176.0;
|
||||||
|
/// Mel filterbank size + count of MFCC cepstral coeffs exposed (timbre vec).
|
||||||
|
pub const MEL_N: usize = 32;
|
||||||
|
pub const MFCC_N: usize = 13;
|
||||||
|
const MEL_LO: f32 = 30.0;
|
||||||
|
const MEL_HI: f32 = 16_000.0;
|
||||||
|
const MFCC_SMOOTH: f32 = 0.25; // EMA on the bipolar cepstral vector
|
||||||
|
|
||||||
/// Per-band level (AGC-normalised, smoothed) + onset spike + rich descriptors.
|
/// Per-band level (AGC-normalised, smoothed) + onset spike + rich descriptors.
|
||||||
/// All scalar fields are 0..~1.
|
/// All scalar fields are 0..~1.
|
||||||
@@ -102,6 +108,15 @@ pub struct Bands {
|
|||||||
/// is *anchored* to this, so `beat_phase` no longer drifts an octave on
|
/// is *anchored* to this, so `beat_phase` no longer drifts an octave on
|
||||||
/// syncopated breakcore fills — sync pulses/dolly-punches to this grid.
|
/// syncopated breakcore fills — sync pulses/dolly-punches to this grid.
|
||||||
pub bpm: f32,
|
pub bpm: f32,
|
||||||
|
/// Stereo width: side/mid RMS ratio, AGC-normalised + smoothed. 0 = mono /
|
||||||
|
/// dead-centre, ->1 = wide / strong L-R / anti-phase. Mono input -> 0.
|
||||||
|
/// Spatialise the visual to the mix's stereo field (spread/parallax).
|
||||||
|
pub width: f32,
|
||||||
|
/// Smoothed MFCC timbre fingerprint: cepstral coeffs c1.. (c0/energy
|
||||||
|
/// dropped), each per-coeff AGC'd to ~[-1,1]. Captures *texture* (saw vs
|
||||||
|
/// pad vs noise) independent of pitch & loudness -> morph palette/figure
|
||||||
|
/// by timbre, not just by note. `mfcc[0]` = c1 (spectral tilt).
|
||||||
|
pub mfcc: [f32; MFCC_N],
|
||||||
/// Spectral flatness 0 (tonal/pad) .. 1 (noisy/break) -> smooth vs jagged.
|
/// Spectral flatness 0 (tonal/pad) .. 1 (noisy/break) -> smooth vs jagged.
|
||||||
pub flatness: f32,
|
pub flatness: f32,
|
||||||
/// Relative pitch-class energy (max-normalised) -> harmonic accent hues.
|
/// Relative pitch-class energy (max-normalised) -> harmonic accent hues.
|
||||||
@@ -128,6 +143,8 @@ impl Default for Bands {
|
|||||||
beat: 0.0,
|
beat: 0.0,
|
||||||
beat_phase: 0.0,
|
beat_phase: 0.0,
|
||||||
bpm: 0.0,
|
bpm: 0.0,
|
||||||
|
width: 0.0,
|
||||||
|
mfcc: [0.0; MFCC_N],
|
||||||
flatness: 0.0,
|
flatness: 0.0,
|
||||||
chroma: [0.0; CHROMA_N],
|
chroma: [0.0; CHROMA_N],
|
||||||
wave: [0.0; WAVE_N],
|
wave: [0.0; WAVE_N],
|
||||||
@@ -227,17 +244,19 @@ fn pick_device(host: &cpal::Host, sel: &Source) -> anyhow::Result<cpal::Device>
|
|||||||
pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
||||||
let (input, out) = triple_buffer::triple_buffer(&Bands::default());
|
let (input, out) = triple_buffer::triple_buffer(&Bands::default());
|
||||||
|
|
||||||
let rb = HeapRb::<f32>::new(RING_CAP);
|
// Ring carries [mid, side] pairs: mid == old mono mean (spectral path
|
||||||
|
// unchanged, bit-identical), side == (L-R)/2 (stereo-width only).
|
||||||
|
let rb = HeapRb::<[f32; 2]>::new(RING_CAP);
|
||||||
let (mut prod, cons) = rb.split();
|
let (mut prod, cons) = rb.split();
|
||||||
let mut push_mono = move |m: f32| {
|
let mut push_ms = move |mid: f32, side: f32| {
|
||||||
let _ = prod.try_push(m);
|
let _ = prod.try_push([mid, side]);
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut streams: Vec<cpal::Stream> = Vec::new();
|
let mut streams: Vec<cpal::Stream> = Vec::new();
|
||||||
let host = cpal::default_host();
|
let host = cpal::default_host();
|
||||||
|
|
||||||
let sample_rate = match &src {
|
let sample_rate = match &src {
|
||||||
Source::File(path) => spawn_file_source(path, push_mono, &mut streams)?,
|
Source::File(path) => spawn_file_source(path, push_ms, &mut streams)?,
|
||||||
_ => {
|
_ => {
|
||||||
let device = pick_device(&host, &src)?;
|
let device = pick_device(&host, &src)?;
|
||||||
let cfg = device.default_input_config()?;
|
let cfg = device.default_input_config()?;
|
||||||
@@ -257,7 +276,7 @@ pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
|||||||
device: &cpal::Device,
|
device: &cpal::Device,
|
||||||
cfg: &cpal::StreamConfig,
|
cfg: &cpal::StreamConfig,
|
||||||
channels: usize,
|
channels: usize,
|
||||||
mut push: impl FnMut(f32) + Send + 'static,
|
mut push: impl FnMut(f32, f32) + Send + 'static,
|
||||||
err_fn: impl FnMut(cpal::StreamError) + Send + 'static,
|
err_fn: impl FnMut(cpal::StreamError) + Send + 'static,
|
||||||
) -> Result<cpal::Stream, cpal::BuildStreamError>
|
) -> Result<cpal::Stream, cpal::BuildStreamError>
|
||||||
where
|
where
|
||||||
@@ -272,7 +291,13 @@ pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
|||||||
for &v in f {
|
for &v in f {
|
||||||
s += f32::from_sample(v);
|
s += f32::from_sample(v);
|
||||||
}
|
}
|
||||||
push(s / f.len().max(1) as f32);
|
let mid = s / f.len().max(1) as f32;
|
||||||
|
let side = if f.len() >= 2 {
|
||||||
|
(f32::from_sample(f[0]) - f32::from_sample(f[1])) * 0.5
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
push(mid, side);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
err_fn,
|
err_fn,
|
||||||
@@ -285,16 +310,17 @@ pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
|||||||
&scfg,
|
&scfg,
|
||||||
move |data: &[f32], _| {
|
move |data: &[f32], _| {
|
||||||
for f in data.chunks(channels) {
|
for f in data.chunks(channels) {
|
||||||
let s: f32 = f.iter().sum::<f32>() / f.len().max(1) as f32;
|
let mid: f32 = f.iter().sum::<f32>() / f.len().max(1) as f32;
|
||||||
push_mono(s);
|
let side = if f.len() >= 2 { (f[0] - f[1]) * 0.5 } else { 0.0 };
|
||||||
|
push_ms(mid, side);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
err_fn,
|
err_fn,
|
||||||
None,
|
None,
|
||||||
)?,
|
)?,
|
||||||
SampleFormat::I16 => run::<i16>(&device, &scfg, channels, push_mono, err_fn)?,
|
SampleFormat::I16 => run::<i16>(&device, &scfg, channels, push_ms, err_fn)?,
|
||||||
SampleFormat::U16 => run::<u16>(&device, &scfg, channels, push_mono, err_fn)?,
|
SampleFormat::U16 => run::<u16>(&device, &scfg, channels, push_ms, err_fn)?,
|
||||||
SampleFormat::I32 => run::<i32>(&device, &scfg, channels, push_mono, err_fn)?,
|
SampleFormat::I32 => run::<i32>(&device, &scfg, channels, push_ms, err_fn)?,
|
||||||
other => anyhow::bail!("unsupported sample format: {other:?}"),
|
other => anyhow::bail!("unsupported sample format: {other:?}"),
|
||||||
};
|
};
|
||||||
stream.play()?;
|
stream.play()?;
|
||||||
@@ -310,7 +336,7 @@ pub fn start(src: Source) -> anyhow::Result<AudioHandle> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode `path`, play it on the default output, tee mono into `push_mono`.
|
/// Decode `path`, play it on the default output, tee `(mid, side)` into `push_ms`.
|
||||||
/// Returns the source sample rate. Falls back to the output device's native
|
/// Returns the source sample rate. Falls back to the output device's native
|
||||||
/// rate with linear resampling if the device rejects the file's rate.
|
/// rate with linear resampling if the device rejects the file's rate.
|
||||||
/// A probed file ready to decode: format reader + audio decoder + the
|
/// A probed file ready to decode: format reader + audio decoder + the
|
||||||
@@ -361,7 +387,7 @@ fn open_file(path: &Path) -> anyhow::Result<DecodedFile> {
|
|||||||
|
|
||||||
fn spawn_file_source(
|
fn spawn_file_source(
|
||||||
path: &Path,
|
path: &Path,
|
||||||
mut push_mono: impl FnMut(f32) + Send + 'static,
|
mut push_ms: impl FnMut(f32, f32) + Send + 'static,
|
||||||
streams: &mut Vec<cpal::Stream>,
|
streams: &mut Vec<cpal::Stream>,
|
||||||
) -> anyhow::Result<f32> {
|
) -> anyhow::Result<f32> {
|
||||||
let DecodedFile {
|
let DecodedFile {
|
||||||
@@ -430,9 +456,11 @@ fn spawn_file_source(
|
|||||||
let resample = (out_sr / file_sr as f32).max(0.01);
|
let resample = (out_sr / file_sr as f32).max(0.01);
|
||||||
|
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
// Linear-resample state per output channel (mono dup across out_ch).
|
// Linear-resample state (mid drives playback dup; side rides along
|
||||||
|
// resampled identically so width stays in lock-step with audio).
|
||||||
let mut frac = 0.0f32;
|
let mut frac = 0.0f32;
|
||||||
let mut prev_mono = 0.0f32;
|
let mut prev_mid = 0.0f32;
|
||||||
|
let mut prev_side = 0.0f32;
|
||||||
let mut ilv: Vec<f32> = Vec::new();
|
let mut ilv: Vec<f32> = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
@@ -452,23 +480,27 @@ fn spawn_file_source(
|
|||||||
decoded.copy_to_vec_interleaved::<f32>(&mut ilv);
|
decoded.copy_to_vec_interleaved::<f32>(&mut ilv);
|
||||||
|
|
||||||
for frame in ilv.chunks(ch) {
|
for frame in ilv.chunks(ch) {
|
||||||
let mono = frame.iter().sum::<f32>() / ch as f32;
|
let mid = frame.iter().sum::<f32>() / ch as f32;
|
||||||
|
let side = if ch >= 2 { (frame[0] - frame[1]) * 0.5 } else { 0.0 };
|
||||||
// Emit `resample` output frames per input frame (linear).
|
// Emit `resample` output frames per input frame (linear).
|
||||||
frac += resample;
|
frac += resample;
|
||||||
while frac >= 1.0 {
|
while frac >= 1.0 {
|
||||||
frac -= 1.0;
|
frac -= 1.0;
|
||||||
let a = 1.0 - frac.min(1.0);
|
let a = 1.0 - frac.min(1.0);
|
||||||
let s = prev_mono * (1.0 - a) + mono * a;
|
let s_mid = prev_mid * (1.0 - a) + mid * a;
|
||||||
push_mono(s);
|
let s_side = prev_side * (1.0 - a) + side * a;
|
||||||
|
push_ms(s_mid, s_side);
|
||||||
// Block until playback ring has room (back-pressure ==
|
// Block until playback ring has room (back-pressure ==
|
||||||
// play speed; keeps analysis in lock-step with audio).
|
// play speed; keeps analysis in lock-step with audio).
|
||||||
|
// Playback stays mono (s_mid) — audible output unchanged.
|
||||||
for _ in 0..out_ch {
|
for _ in 0..out_ch {
|
||||||
while pb_prod.try_push(s).is_err() {
|
while pb_prod.try_push(s_mid).is_err() {
|
||||||
thread::sleep(Duration::from_millis(1));
|
thread::sleep(Duration::from_millis(1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prev_mono = mono;
|
prev_mid = mid;
|
||||||
|
prev_side = side;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -476,7 +508,8 @@ fn spawn_file_source(
|
|||||||
Ok(file_sr as f32)
|
Ok(file_sr as f32)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Streaming STFT analyser. Feed mono samples; emits one [`Bands`] per hop.
|
/// Streaming STFT analyser. Feed `(mid, side)` pairs; emits one [`Bands`] per
|
||||||
|
/// hop. `mid` is the mono analysis signal; `side` only drives stereo width.
|
||||||
///
|
///
|
||||||
/// Holds all envelope / AGC / onset state so the live thread and the offline
|
/// Holds all envelope / AGC / onset state so the live thread and the offline
|
||||||
/// batch produce bit-identical frames for the same input.
|
/// batch produce bit-identical frames for the same input.
|
||||||
@@ -523,6 +556,19 @@ pub struct Analyzer {
|
|||||||
acf_lag_min: usize,
|
acf_lag_min: usize,
|
||||||
acf_lag_max: usize,
|
acf_lag_max: usize,
|
||||||
bpm: f32,
|
bpm: f32,
|
||||||
|
// Stereo (Mid/Side) width: per-hop RMS-energy accumulators (zeroed each
|
||||||
|
// hop) + AGC ceiling. Fed bit-identically live/offline via push(mid,side);
|
||||||
|
// `mid` is the old mono mean so every pre-existing field is unchanged.
|
||||||
|
ms_mid_sq: f32,
|
||||||
|
ms_side_sq: f32,
|
||||||
|
ms_n: usize,
|
||||||
|
agc_width: f32,
|
||||||
|
// MFCC: precomputed mel triangular filterbank (start bin + weights) + the
|
||||||
|
// DCT-II cosine table (MFCC_N rows x MEL_N, row-major); per-coeff bipolar
|
||||||
|
// AGC ceilings give a stable ~[-1,1] timbre vector.
|
||||||
|
mel_filt: Vec<(usize, Vec<f32>)>,
|
||||||
|
dct: Vec<f32>,
|
||||||
|
agc_mfcc: [f32; MFCC_N],
|
||||||
}
|
}
|
||||||
|
|
||||||
fn norm(v: f32, c: &mut f32) -> f32 {
|
fn norm(v: f32, c: &mut f32) -> f32 {
|
||||||
@@ -530,6 +576,13 @@ fn norm(v: f32, c: &mut f32) -> f32 {
|
|||||||
(v / *c).clamp(0.0, 1.0)
|
(v / *c).clamp(0.0, 1.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bipolar AGC: like `norm` but keeps sign (cepstral coeffs swing about 0).
|
||||||
|
fn norm_signed(v: f32, c: &mut f32) -> f32 {
|
||||||
|
let a = v.abs();
|
||||||
|
*c = (*c * AGC_DECAY).max(AGC_FLOOR).max(a);
|
||||||
|
(v / *c).clamp(-1.0, 1.0)
|
||||||
|
}
|
||||||
|
|
||||||
fn follow(env: &mut f32, x: f32) {
|
fn follow(env: &mut f32, x: f32) {
|
||||||
let coeff = if x > *env { ATTACK } else { RELEASE };
|
let coeff = if x > *env { ATTACK } else { RELEASE };
|
||||||
*env += (x - *env) * coeff;
|
*env += (x - *env) * coeff;
|
||||||
@@ -565,6 +618,46 @@ impl Analyzer {
|
|||||||
let acf_lag_max = (ACF_PERIOD_HI / hop_dt).round() as usize;
|
let acf_lag_max = (ACF_PERIOD_HI / hop_dt).round() as usize;
|
||||||
let acf_n = ((ACF_WIN_SECS / hop_dt).round() as usize).max(acf_lag_max + 2);
|
let acf_n = ((ACF_WIN_SECS / hop_dt).round() as usize).max(acf_lag_max + 2);
|
||||||
|
|
||||||
|
// Mel triangular filterbank (MEL_N+2 mel-spaced edges -> bin space) +
|
||||||
|
// the DCT-II cosine table for the MFCCs. Built once; pure fn of sr.
|
||||||
|
let hz_to_mel = |f: f32| 2595.0 * (1.0 + f / 700.0).log10();
|
||||||
|
let mel_to_hz = |m: f32| 700.0 * (10f32.powf(m / 2595.0) - 1.0);
|
||||||
|
let (m_lo, m_hi) = (hz_to_mel(MEL_LO), hz_to_mel(MEL_HI));
|
||||||
|
let mut edges = [0.0f32; MEL_N + 2];
|
||||||
|
for (i, e) in edges.iter_mut().enumerate() {
|
||||||
|
let m = m_lo + (m_hi - m_lo) * i as f32 / (MEL_N + 1) as f32;
|
||||||
|
*e = mel_to_hz(m) / bin_hz; // edge position in FFT bins
|
||||||
|
}
|
||||||
|
let mut mel_filt: Vec<(usize, Vec<f32>)> = Vec::with_capacity(MEL_N);
|
||||||
|
for j in 0..MEL_N {
|
||||||
|
let (f0, f1, f2) = (edges[j], edges[j + 1], edges[j + 2]);
|
||||||
|
let a = (f0.floor() as usize).min(half - 1);
|
||||||
|
let b = ((f2.ceil() as usize).max(a + 1)).min(half);
|
||||||
|
let w = (a..b)
|
||||||
|
.map(|bin| {
|
||||||
|
let x = bin as f32;
|
||||||
|
let g = if x <= f1 {
|
||||||
|
if f1 > f0 { (x - f0) / (f1 - f0) } else { 0.0 }
|
||||||
|
} else if f2 > f1 {
|
||||||
|
(f2 - x) / (f2 - f1)
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
g.clamp(0.0, 1.0)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
mel_filt.push((a, w));
|
||||||
|
}
|
||||||
|
let mut dct = vec![0.0f32; MFCC_N * MEL_N];
|
||||||
|
for k in 1..=MFCC_N {
|
||||||
|
for j in 0..MEL_N {
|
||||||
|
dct[(k - 1) * MEL_N + j] = (std::f32::consts::PI * k as f32
|
||||||
|
* (j as f32 + 0.5)
|
||||||
|
/ MEL_N as f32)
|
||||||
|
.cos();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Analyzer {
|
Analyzer {
|
||||||
hann,
|
hann,
|
||||||
fft,
|
fft,
|
||||||
@@ -599,13 +692,25 @@ impl Analyzer {
|
|||||||
acf_lag_min,
|
acf_lag_min,
|
||||||
acf_lag_max,
|
acf_lag_max,
|
||||||
bpm: 0.0,
|
bpm: 0.0,
|
||||||
|
ms_mid_sq: 0.0,
|
||||||
|
ms_side_sq: 0.0,
|
||||||
|
ms_n: 0,
|
||||||
|
agc_width: AGC_FLOOR,
|
||||||
|
mel_filt,
|
||||||
|
dct,
|
||||||
|
agc_mfcc: [AGC_FLOOR; MFCC_N],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Push one mono sample. Returns `Some(bands)` when a hop completes.
|
/// Push one `(mid, side)` sample pair. `mid` (= the old mono channel mean)
|
||||||
pub fn push(&mut self, s: f32) -> Option<Bands> {
|
/// drives all spectral analysis unchanged; `side` (= (L-R)/2) only feeds
|
||||||
|
/// the new stereo-width metric. Returns `Some(bands)` when a hop completes.
|
||||||
|
pub fn push(&mut self, mid: f32, side: f32) -> Option<Bands> {
|
||||||
|
self.ms_mid_sq += mid * mid;
|
||||||
|
self.ms_side_sq += side * side;
|
||||||
|
self.ms_n += 1;
|
||||||
self.win.copy_within(1..FFT_SIZE, 0);
|
self.win.copy_within(1..FFT_SIZE, 0);
|
||||||
self.win[FFT_SIZE - 1] = s;
|
self.win[FFT_SIZE - 1] = mid;
|
||||||
self.filled = (self.filled + 1).min(FFT_SIZE);
|
self.filled = (self.filled + 1).min(FFT_SIZE);
|
||||||
self.since_hop += 1;
|
self.since_hop += 1;
|
||||||
if self.filled < FFT_SIZE || self.since_hop < HOP {
|
if self.filled < FFT_SIZE || self.since_hop < HOP {
|
||||||
@@ -722,6 +827,35 @@ impl Analyzer {
|
|||||||
let am = lin_sum / nbin;
|
let am = lin_sum / nbin;
|
||||||
let flatness = if am > 1e-9 { (gm / am).clamp(0.0, 1.0) } else { 0.0 };
|
let flatness = if am > 1e-9 { (gm / am).clamp(0.0, 1.0) } else { 0.0 };
|
||||||
|
|
||||||
|
// MFCC: mel-filterbank energies (magnitude) -> log -> DCT-II. c0
|
||||||
|
// (overall energy) is dropped; c1.. = pitch-independent timbre.
|
||||||
|
let mut mel_log = [0.0f32; MEL_N];
|
||||||
|
for (j, (a, w)) in self.mel_filt.iter().enumerate() {
|
||||||
|
let mut e = 0.0f32;
|
||||||
|
for (o, &g) in w.iter().enumerate() {
|
||||||
|
e += mags[a + o] * g;
|
||||||
|
}
|
||||||
|
mel_log[j] = (e + 1e-9).ln();
|
||||||
|
}
|
||||||
|
let mut mfcc = [0.0f32; MFCC_N];
|
||||||
|
for (k, mc) in mfcc.iter_mut().enumerate() {
|
||||||
|
let row = &self.dct[k * MEL_N..(k + 1) * MEL_N];
|
||||||
|
let mut s = 0.0f32;
|
||||||
|
for (j, &r) in row.iter().enumerate() {
|
||||||
|
s += mel_log[j] * r;
|
||||||
|
}
|
||||||
|
*mc = norm_signed(s, &mut self.agc_mfcc[k]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stereo width from this hop's RMS energy (zero the accumulators).
|
||||||
|
let msn = self.ms_n.max(1) as f32;
|
||||||
|
let mid_rms = (self.ms_mid_sq / msn).sqrt();
|
||||||
|
let side_rms = (self.ms_side_sq / msn).sqrt();
|
||||||
|
self.ms_mid_sq = 0.0;
|
||||||
|
self.ms_side_sq = 0.0;
|
||||||
|
self.ms_n = 0;
|
||||||
|
let width = norm(side_rms / (mid_rms + 1e-6), &mut self.agc_width);
|
||||||
|
|
||||||
// Advance prev_mag now that flux is computed.
|
// Advance prev_mag now that flux is computed.
|
||||||
self.prev_mag.copy_from_slice(&mags);
|
self.prev_mag.copy_from_slice(&mags);
|
||||||
|
|
||||||
@@ -781,6 +915,12 @@ impl Analyzer {
|
|||||||
self.env.flux = self.broad_pop;
|
self.env.flux = self.broad_pop;
|
||||||
self.env.csd = self.csd_pop;
|
self.env.csd = self.csd_pop;
|
||||||
follow(&mut self.env.flatness, flatness);
|
follow(&mut self.env.flatness, flatness);
|
||||||
|
follow(&mut self.env.width, width);
|
||||||
|
// Bipolar cepstral vector: plain EMA (slowly-evolving fingerprint, not
|
||||||
|
// an attack — `follow`'s rise/fall asymmetry would distort it).
|
||||||
|
for (e, &m) in self.env.mfcc.iter_mut().zip(&mfcc) {
|
||||||
|
*e += (m - *e) * MFCC_SMOOTH;
|
||||||
|
}
|
||||||
|
|
||||||
// Autocorrelation tempo: anchor the predictive IOI to the dominant
|
// Autocorrelation tempo: anchor the predictive IOI to the dominant
|
||||||
// period in ~3 s of broadband-flux history *before* the beat block
|
// period in ~3 s of broadband-flux history *before* the beat block
|
||||||
@@ -895,12 +1035,12 @@ impl Analyzer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn analysis_loop(
|
fn analysis_loop(
|
||||||
mut cons: impl Consumer<Item = f32> + Observer,
|
mut cons: impl Consumer<Item = [f32; 2]>, // Observer is a Consumer supertrait
|
||||||
sample_rate: f32,
|
sample_rate: f32,
|
||||||
mut out: triple_buffer::Input<Bands>,
|
mut out: triple_buffer::Input<Bands>,
|
||||||
) {
|
) {
|
||||||
let mut an = Analyzer::new(sample_rate);
|
let mut an = Analyzer::new(sample_rate);
|
||||||
let mut scratch = vec![0.0f32; HOP * 8];
|
let mut scratch = vec![[0.0f32; 2]; HOP * 8];
|
||||||
loop {
|
loop {
|
||||||
let avail = cons.occupied_len();
|
let avail = cons.occupied_len();
|
||||||
if avail == 0 {
|
if avail == 0 {
|
||||||
@@ -909,8 +1049,8 @@ fn analysis_loop(
|
|||||||
}
|
}
|
||||||
let take = avail.min(scratch.len());
|
let take = avail.min(scratch.len());
|
||||||
let got = cons.pop_slice(&mut scratch[..take]);
|
let got = cons.pop_slice(&mut scratch[..take]);
|
||||||
for &s in &scratch[..got] {
|
for &[mid, side] in &scratch[..got] {
|
||||||
if let Some(b) = an.push(s) {
|
if let Some(b) = an.push(mid, side) {
|
||||||
out.write(b);
|
out.write(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -951,9 +1091,10 @@ pub fn analyze_file(path: &Path) -> anyhow::Result<Timeline> {
|
|||||||
let ch = decoded.spec().channels().count().max(1);
|
let ch = decoded.spec().channels().count().max(1);
|
||||||
decoded.copy_to_vec_interleaved::<f32>(&mut ilv);
|
decoded.copy_to_vec_interleaved::<f32>(&mut ilv);
|
||||||
for frame in ilv.chunks(ch) {
|
for frame in ilv.chunks(ch) {
|
||||||
let mono = frame.iter().sum::<f32>() / ch as f32;
|
let mid = frame.iter().sum::<f32>() / ch as f32;
|
||||||
|
let side = if ch >= 2 { (frame[0] - frame[1]) * 0.5 } else { 0.0 };
|
||||||
samples += 1;
|
samples += 1;
|
||||||
if let Some(b) = an.push(mono) {
|
if let Some(b) = an.push(mid, side) {
|
||||||
frames.push(b);
|
frames.push(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+13
-1
@@ -328,16 +328,22 @@ fn main() {
|
|||||||
Ok(tl) => {
|
Ok(tl) => {
|
||||||
let mut peak = Bands::default();
|
let mut peak = Bands::default();
|
||||||
let mut bpms: Vec<f32> = Vec::new();
|
let mut bpms: Vec<f32> = Vec::new();
|
||||||
|
let mut mfcc_abs = 0.0f32; // mean |c1| -> timbre vec is alive
|
||||||
for b in &tl.frames {
|
for b in &tl.frames {
|
||||||
peak.low = peak.low.max(b.low);
|
peak.low = peak.low.max(b.low);
|
||||||
peak.loud = peak.loud.max(b.loud);
|
peak.loud = peak.loud.max(b.loud);
|
||||||
peak.flux = peak.flux.max(b.flux);
|
peak.flux = peak.flux.max(b.flux);
|
||||||
peak.csd = peak.csd.max(b.csd);
|
peak.csd = peak.csd.max(b.csd);
|
||||||
peak.centroid = peak.centroid.max(b.centroid);
|
peak.centroid = peak.centroid.max(b.centroid);
|
||||||
|
peak.width = peak.width.max(b.width);
|
||||||
|
mfcc_abs += b.mfcc[0].abs();
|
||||||
if b.bpm > 0.0 {
|
if b.bpm > 0.0 {
|
||||||
bpms.push(b.bpm);
|
bpms.push(b.bpm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let nf = tl.frames.len().max(1) as f32;
|
||||||
|
mfcc_abs /= nf;
|
||||||
|
let last = tl.frames.last().copied().unwrap_or_default();
|
||||||
// Median BPM = the track's anchored tempo (ACF-stabilised).
|
// Median BPM = the track's anchored tempo (ACF-stabilised).
|
||||||
let med_bpm = if bpms.is_empty() {
|
let med_bpm = if bpms.is_empty() {
|
||||||
0.0
|
0.0
|
||||||
@@ -346,7 +352,7 @@ fn main() {
|
|||||||
bpms[bpms.len() / 2]
|
bpms[bpms.len() / 2]
|
||||||
};
|
};
|
||||||
println!(
|
println!(
|
||||||
"ok: {} frames, {:.2}s, {} Hz, {:.1} fps\n peak low {:.2} loud {:.2} flux {:.2} csd {:.2} centroid {:.2}\n tempo {:.1} BPM (median of {} locked frames)",
|
"ok: {} frames, {:.2}s, {} Hz, {:.1} fps\n peak low {:.2} loud {:.2} flux {:.2} csd {:.2} centroid {:.2} width {:.2}\n tempo {:.1} BPM (median of {} locked frames)\n mfcc c1..c4 [{:+.2} {:+.2} {:+.2} {:+.2}] (last) mean|c1| {:.2}",
|
||||||
tl.frames.len(),
|
tl.frames.len(),
|
||||||
tl.duration(),
|
tl.duration(),
|
||||||
tl.sample_rate as u32,
|
tl.sample_rate as u32,
|
||||||
@@ -356,8 +362,14 @@ fn main() {
|
|||||||
peak.flux,
|
peak.flux,
|
||||||
peak.csd,
|
peak.csd,
|
||||||
peak.centroid,
|
peak.centroid,
|
||||||
|
peak.width,
|
||||||
med_bpm,
|
med_bpm,
|
||||||
bpms.len(),
|
bpms.len(),
|
||||||
|
last.mfcc[0],
|
||||||
|
last.mfcc[1],
|
||||||
|
last.mfcc[2],
|
||||||
|
last.mfcc[3],
|
||||||
|
mfcc_abs,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Err(e) => die(format!("analyze: {e}")),
|
Err(e) => die(format!("analyze: {e}")),
|
||||||
|
|||||||
Reference in New Issue
Block a user