feat: add flux visualization

This commit is contained in:
Peter Steinberger 2026-01-02 14:48:52 +01:00
parent a3f8d2dcd1
commit fc390e7e86
7 changed files with 12 additions and 7 deletions

View File

@ -2,7 +2,7 @@
## 0.1.0 - 2026-01-02
- Spectrogram + feature panels (mel, chroma, hpss, selfsim, loudness, tempogram, mfcc) with multi-panel grid
- Spectrogram + feature panels (mel, chroma, hpss, selfsim, loudness, tempogram, mfcc, flux) with multi-panel grid
- Native WAV/MP3 decoding (ffmpeg fallback for other formats)
- PNG/JPEG output with size control and time slicing
- Palette styles: classic, magma, inferno, viridis, gray

View File

@ -5,7 +5,7 @@ Generate modern spectrogram images from audio files.
## Features
- Classic timefrequency spectrograms (FFT/STFT, Hann window)
- Multi-panel feature visualizations (mel, chroma, hpss, selfsim, loudness, tempogram, mfcc)
- Multi-panel feature visualizations (mel, chroma, hpss, selfsim, loudness, tempogram, mfcc, flux)
- Native decoding for WAV + MP3, ffmpeg fallback for everything else
- PNG or JPEG output (default JPG)
- Time slicing via `--start` + `--duration`

View File

@ -36,7 +36,7 @@ type cli struct {
Duration float64 `name:"duration" help:"duration in seconds (0 = full)"`
SampleRate int `name:"sample-rate" help:"ffmpeg output sample rate" default:"44100"`
Style string `help:"palette style: classic, magma, inferno, viridis, gray" default:"classic"`
Viz []string `name:"viz" help:"visualizations (repeatable or comma-separated): spectrogram, mel, chroma, hpss, selfsim, loudness, tempogram, mfcc"`
Viz []string `name:"viz" help:"visualizations (repeatable or comma-separated): spectrogram, mel, chroma, hpss, selfsim, loudness, tempogram, mfcc, flux"`
FFmpegPath string `name:"ffmpeg" help:"path to ffmpeg binary"`
Quiet bool `short:"q" help:"suppress stdout output"`
Verbose bool `short:"v" help:"verbose stderr output"`

View File

@ -50,7 +50,7 @@ body_class: home
</div>
<div class="card">
<h3>Feature panels</h3>
<p>mel, chroma, hpss, selfsim, loudness, tempogram, mfcc — rendered as single or grid views.</p>
<p>mel, chroma, hpss, selfsim, loudness, tempogram, mfcc, flux — rendered as single or grid views.</p>
</div>
<div class="card">
<h3>Clean output</h3>

View File

@ -67,8 +67,8 @@ description: songsee spectral pipeline, defaults, and rendering details.
<div class="card">
<p>
Visualizations are selectable via --viz. Defaults to spectrogram. Supported names: spectrogram,
mel, chroma, hpss, selfsim, loudness, tempogram, mfcc. Multiple entries render as a grid of
panels.
mel, chroma, hpss, selfsim, loudness, tempogram, mfcc, flux. Multiple entries render as a grid
of panels.
</p>
</div>
</section>

View File

@ -25,6 +25,7 @@ const (
Loudness Kind = "loudness"
Tempogram Kind = "tempogram"
MFCC Kind = "mfcc"
Flux Kind = "flux"
)
var validKinds = map[Kind]struct{}{
@ -36,6 +37,7 @@ var validKinds = map[Kind]struct{}{
Loudness: {},
Tempogram: {},
MFCC: {},
Flux: {},
}
// ParseList normalizes a list of viz names, allowing comma-separated values.
@ -173,6 +175,9 @@ func Render(kind Kind, ctx *Context, opts RenderOptions) (*image.RGBA, error) {
Palette: opts.Palette,
FlipVert: true,
})
case Flux:
flux := dsp.SpectralFlux(&ctx.Spec)
return render.Loudness(flux, opts.Width, opts.Height, opts.Palette)
default:
return nil, fmt.Errorf("unknown viz: %s", kind)
}

View File

@ -33,7 +33,7 @@ func TestRenderAllKinds(t *testing.T) {
Height: 80,
Palette: colorRGBA,
}
kinds := []Kind{Spectrogram, Mel, Chroma, MFCC, HPSS, SelfSim, Loudness, Tempogram}
kinds := []Kind{Spectrogram, Mel, Chroma, MFCC, HPSS, SelfSim, Loudness, Tempogram, Flux}
for _, kind := range kinds {
img, err := Render(kind, ctx, opts)
if err != nil {