From ebb7912ac32713b34ead56be28364b61a3c24171 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 2 Jan 2026 13:54:10 +0100 Subject: [PATCH] feat: add songsee CLI and tests --- cmd/songsee/-.jpg | Bin 0 -> 33233 bytes cmd/songsee/main.go | 261 ++++++++++++ cmd/songsee/main_test.go | 580 ++++++++++++++++++++++++++ go.mod | 7 + go.sum | 12 + internal/audio/audio.go | 21 + internal/audio/audio_test.go | 249 +++++++++++ internal/audio/decode.go | 78 ++++ internal/audio/ffmpeg.go | 67 +++ internal/audio/ffmpeg_test.go | 74 ++++ internal/audio/mp3.go | 72 ++++ internal/audio/mp3_errors_test.go | 30 ++ internal/audio/slice.go | 33 ++ internal/audio/wav.go | 229 ++++++++++ internal/audio/wav_extensible_test.go | 47 +++ internal/audio/wav_extra_test.go | 118 ++++++ internal/audio/wav_float_test.go | 77 ++++ internal/audio/wav_pcm_test.go | 101 +++++ internal/audio/wav_test.go | 38 ++ internal/dsp/fft.go | 40 ++ internal/dsp/fft_test.go | 13 + internal/dsp/spectrogram.go | 98 +++++ internal/dsp/spectrogram_test.go | 51 +++ internal/render/palette.go | 92 ++++ internal/render/render.go | 102 +++++ internal/render/render_test.go | 154 +++++++ testdata/sine.mp3 | Bin 0 -> 4986 bytes 27 files changed, 2644 insertions(+) create mode 100644 cmd/songsee/-.jpg create mode 100644 cmd/songsee/main.go create mode 100644 cmd/songsee/main_test.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/audio/audio.go create mode 100644 internal/audio/audio_test.go create mode 100644 internal/audio/decode.go create mode 100644 internal/audio/ffmpeg.go create mode 100644 internal/audio/ffmpeg_test.go create mode 100644 internal/audio/mp3.go create mode 100644 internal/audio/mp3_errors_test.go create mode 100644 internal/audio/slice.go create mode 100644 internal/audio/wav.go create mode 100644 internal/audio/wav_extensible_test.go create mode 100644 internal/audio/wav_extra_test.go create mode 100644 internal/audio/wav_float_test.go create mode 100644 internal/audio/wav_pcm_test.go create mode 100644 internal/audio/wav_test.go create mode 100644 internal/dsp/fft.go create mode 100644 internal/dsp/fft_test.go create mode 100644 internal/dsp/spectrogram.go create mode 100644 internal/dsp/spectrogram_test.go create mode 100644 internal/render/palette.go create mode 100644 internal/render/render.go create mode 100644 internal/render/render_test.go create mode 100644 testdata/sine.mp3 diff --git a/cmd/songsee/-.jpg b/cmd/songsee/-.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6fa19aeda98209cf8a73c04c835cfe07bdf86cb2 GIT binary patch literal 33233 zcmeIwH&7Kp9LMq9+sp429?8)007W~3iZ%|I?Z9jYW;-yT&NvEUqQk5m&uoWTD^bAE zVFau&q7|66@Z5XPVJy|g=KG)7|J=>q&isFSpRgx$GfQv|MA=Il^1A&fw(K!zPMDTK6A zqe=0rq@hg9cJv6b5qfN;^sttmRDZcm?+G)oGIEQ`?aaz*CsCVK+|bhM=2f({XIFI( z=9kpeHF|!|$Wf!m6pS5LIDSIu#7UFOrc9kaW9F>cbLP&QzhL2_#Y>hhTfSoDs?}@O zu3NuhPn-hIvc4;(yn_{h;?$CD>co;rQz?78z7E?&BP3{=>3O}pFV&2`t7?EDji4nbfDkz1Uioq z-MJIxkw(2O= 0 { + return exitCode + } + if err != nil { + if parseErr, ok := err.(*kong.ParseError); ok { + _, _ = fmt.Fprintln(stderr, "songsee:", parseErr) + if parseErr.Context != nil { + parseErr.Context.Stdout = stderr + _ = parseErr.Context.PrintUsage(false) + } + return 2 + } + _, _ = fmt.Fprintln(stderr, "songsee:", err) + return 1 + } + + input := cfg.Input + if input == "" { + if ctx != nil { + ctx.Stdout = stderr + _ = ctx.PrintUsage(false) + } + return 2 + } + + if cfg.MaxFreq > 0 && cfg.MaxFreq <= cfg.MinFreq { + return dieUsage(stderr, ctx, "--max-freq must be > --min-freq") + } + if cfg.Width <= 0 || cfg.Height <= 0 { + return dieUsage(stderr, ctx, "--width and --height must be > 0") + } + if cfg.WindowSize <= 0 || cfg.HopSize <= 0 { + return dieUsage(stderr, ctx, "--window and --hop must be > 0") + } + if !isPowerOfTwo(cfg.WindowSize) { + return dieUsage(stderr, ctx, "--window must be a power of two") + } + if cfg.StartSec < 0 || cfg.Duration < 0 { + return dieUsage(stderr, ctx, "--start and --duration must be >= 0") + } + + format := strings.ToLower(cfg.Format) + if format != "jpg" && format != "jpeg" && format != "png" { + return dieUsage(stderr, ctx, "--format must be jpg or png") + } + if format == "jpeg" { + format = "jpg" + } + + output := cfg.Output + if output == "" { + if input == "-" { + output = "songsee." + format + } else { + ext := strings.ToLower(filepath.Ext(input)) + base := strings.TrimSuffix(filepath.Base(input), ext) + output = filepath.Join(filepath.Dir(input), base+"."+format) + } + } else { + ext := strings.ToLower(filepath.Ext(output)) + switch ext { + case ".png": + format = "png" + case ".jpg", ".jpeg": + format = "jpg" + default: + if !formatSet { + output = output + "." + format + } + } + } + + if cfg.Verbose { + _, _ = fmt.Fprintf(stderr, "input: %s\n", input) + _, _ = fmt.Fprintf(stderr, "output: %s (%s)\n", output, format) + } + + opts := audio.Options{SampleRate: cfg.SampleRate, FFmpegPath: cfg.FFmpegPath} + var pcm audio.Audio + if input == "-" { + pcm, err = audio.DecodeReader(stdin, opts) + } else { + pcm, err = audio.DecodeFile(input, opts) + } + if err != nil { + return die(stderr, err) + } + if len(pcm.Samples) == 0 { + return die(stderr, errors.New("no samples decoded")) + } + if cfg.Verbose { + _, _ = fmt.Fprintf(stderr, "decoded: %d samples @ %d Hz\n", len(pcm.Samples), pcm.SampleRate) + } + if cfg.StartSec > 0 || cfg.Duration > 0 { + pcm, err = audio.Slice(pcm, cfg.StartSec, cfg.Duration) + if err != nil { + return die(stderr, err) + } + if cfg.Verbose { + _, _ = fmt.Fprintf(stderr, "slice: %0.2fs + %0.2fs => %d samples\n", cfg.StartSec, cfg.Duration, len(pcm.Samples)) + } + } + + spec := dsp.ComputeSpectrogram(pcm.Samples, pcm.SampleRate, cfg.WindowSize, cfg.HopSize) + style := strings.ToLower(strings.TrimSpace(cfg.Style)) + palette, err := render.PaletteByName(style) + if err != nil { + return dieUsage(stderr, ctx, "unknown style") + } + + img, err := render.Spectrogram(spec, render.Options{ + Width: cfg.Width, + Height: cfg.Height, + MinFreq: cfg.MinFreq, + MaxFreq: cfg.MaxFreq, + Palette: palette, + }) + if err != nil { + return die(stderr, err) + } + + if err := writeImage(output, format, img, stdout); err != nil { + return die(stderr, err) + } + + if output != "-" && !cfg.Quiet { + _, _ = fmt.Fprintln(stdout, output) + } + return 0 +} + +func writeImage(path, format string, img image.Image, stdout io.Writer) error { + var out io.Writer + if path == "-" { + out = stdout + } else { + file, err := os.Create(path) + if err != nil { + return err + } + defer func() { _ = file.Close() }() + out = file + } + + switch format { + case "png": + return png.Encode(out, img) + case "jpg": + return jpeg.Encode(out, img, &jpeg.Options{Quality: 95}) + default: + return fmt.Errorf("unknown format %s", format) + } +} + +func die(stderr io.Writer, err error) int { + _, _ = fmt.Fprintln(stderr, "songsee:", err) + return 1 +} + +func dieUsage(stderr io.Writer, ctx *kong.Context, msg string) int { + _, _ = fmt.Fprintln(stderr, "songsee:", msg) + if ctx != nil { + ctx.Stdout = stderr + _ = ctx.PrintUsage(false) + } + return 2 +} + +func isPowerOfTwo(v int) bool { + return v > 0 && (v&(v-1)) == 0 +} + +func hasFlag(args []string, name string) bool { + for i := 0; i < len(args); i++ { + arg := args[i] + if arg == name || strings.HasPrefix(arg, name+"=") { + return true + } + } + return false +} diff --git a/cmd/songsee/main_test.go b/cmd/songsee/main_test.go new file mode 100644 index 0000000..dfa1c4e --- /dev/null +++ b/cmd/songsee/main_test.go @@ -0,0 +1,580 @@ +package main + +import ( + "bytes" + "image" + "image/png" + "math" + "os" + "path/filepath" + "testing" +) + +func TestRunMP3E2E(t *testing.T) { + input := testdataPath(t, "sine.mp3") + outDir := t.TempDir() + outPath := filepath.Join(outDir, "spectro.jpg") + + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{ + "--width", "320", + "--height", "180", + "--start", "0.2", + "--duration", "0.5", + "--style", "magma", + "--output", outPath, + input, + }, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if stdout.String() == "" { + t.Fatalf("expected stdout output") + } + info, err := os.Stat(outPath) + if err != nil { + t.Fatalf("missing output: %v", err) + } + if info.Size() == 0 { + t.Fatalf("empty output") + } + + file, err := os.Open(outPath) + if err != nil { + t.Fatalf("open output: %v", err) + } + defer func() { _ = file.Close() }() + img, _, err := image.Decode(file) + if err != nil { + t.Fatalf("decode image: %v", err) + } + if img.Bounds().Dx() != 320 || img.Bounds().Dy() != 180 { + t.Fatalf("size mismatch") + } + if flatImage(img) { + t.Fatalf("image appears flat") + } +} + +func TestRunFromStdinPNG(t *testing.T) { + outDir := t.TempDir() + outPath := filepath.Join(outDir, "spectro.png") + + wav := makeWAV([]int16{0, 2000, -2000, 0, 1000, -1000}, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{ + "--format", "png", + "--output", outPath, + "-", + }, bytes.NewReader(wav), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + file, err := os.Open(outPath) + if err != nil { + t.Fatalf("open output: %v", err) + } + defer func() { _ = file.Close() }() + if _, err := png.Decode(file); err != nil { + t.Fatalf("decode png: %v", err) + } +} + +func TestRunVersion(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--version"}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d", exit) + } + if stdout.String() == "" { + t.Fatalf("expected version output") + } +} + +func TestRunHelp(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--help"}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d", exit) + } + if stdout.String() == "" && stderr.String() == "" { + t.Fatalf("expected help output") + } +} + +func TestRunInvalidWindow(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--window", "1000", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } + if stderr.String() == "" { + t.Fatalf("expected stderr usage") + } +} + +func TestRunUnknownFlag(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--nope"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } + if stderr.String() == "" { + t.Fatalf("expected stderr output") + } +} + +func TestRunBadFormat(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "gif", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunBadFreqRange(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--min-freq", "100", "--max-freq", "50", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunUnknownStyle(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--style", "nope", "-"}, bytes.NewReader(makeWAV([]int16{0, 1}, 44100, 1)), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunBadSize(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--width", "0", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunBadWindowZero(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--window", "0", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunBadHopZero(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--hop", "0", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunNegativeStart(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--start=-1", "-"}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } +} + +func TestRunMissingFile(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"nope.wav"}, bytes.NewReader(nil), stdout, stderr) + if exit != 1 { + t.Fatalf("expected error exit, got %d", exit) + } + if stderr.String() == "" { + t.Fatalf("expected stderr output") + } +} + +func TestRunMissingInput(t *testing.T) { + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{}, bytes.NewReader(nil), stdout, stderr) + if exit != 2 { + t.Fatalf("expected usage exit, got %d", exit) + } + if stderr.String() == "" { + t.Fatalf("expected stderr output") + } +} + +func TestRunNoSamplesDecoded(t *testing.T) { + wav := makeWAV([]int16{}, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"-"}, bytes.NewReader(wav), stdout, stderr) + if exit != 1 { + t.Fatalf("expected error exit, got %d", exit) + } + if !bytes.Contains(stderr.Bytes(), []byte("no samples")) { + t.Fatalf("expected no samples error") + } +} + +func TestRunSliceError(t *testing.T) { + wav := makeWAV([]int16{0, 1, -1, 0}, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--start", "2", "--duration", "1", "-"}, bytes.NewReader(wav), stdout, stderr) + if exit != 1 { + t.Fatalf("expected error exit, got %d", exit) + } +} + +func TestRunSliceVerbose(t *testing.T) { + samples := make([]int16, 44100) + wav := makeWAV(samples, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--verbose", "--start", "0", "--duration", "0.2", "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if !bytes.Contains(stderr.Bytes(), []byte("slice:")) { + t.Fatalf("expected slice output") + } +} + +func TestRunStyleAffectsOutput(t *testing.T) { + wav := makeWAV(genSineMixSamples(44100), 44100, 1) + outClassic := runToBytes(t, wav, "classic") + outMagma := runToBytes(t, wav, "magma") + if bytes.Equal(outClassic, outMagma) { + t.Fatalf("expected different output for different styles") + } +} + +func TestRunOutputStdout(t *testing.T) { + wav := makeWAV([]int16{0, 1000, -1000, 0, 500, -500}, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{ + "--format", "png", + "--output", "-", + "-", + }, bytes.NewReader(wav), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if stdout.Len() == 0 { + t.Fatalf("expected image bytes on stdout") + } + if _, err := png.Decode(bytes.NewReader(stdout.Bytes())); err != nil { + t.Fatalf("decode stdout png: %v", err) + } +} + +func TestRunOutputAuto(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "png", input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + outPath := filepath.Join(dir, "input.png") + if _, err := os.Stat(outPath); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunOutputExtOverride(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out.png") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(output); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunOutputAppendDefault(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(output + ".jpg"); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunOutputJpgExt(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out.jpg") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(output); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunWriteImageError(t *testing.T) { + wav := makeWAV([]int16{0, 1000, -1000, 0}, 44100, 1) + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--output", "/nope/dir/out.jpg", "-"}, bytes.NewReader(wav), stdout, stderr) + if exit != 1 { + t.Fatalf("expected error exit, got %d", exit) + } +} + +func TestRunFormatFlagKeepsOutput(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "customout") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "png", "--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(output); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunQuiet(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out.jpg") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--quiet", "--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if stdout.String() != "" { + t.Fatalf("expected quiet stdout") + } +} + +func TestDie(t *testing.T) { + stderr := &bytes.Buffer{} + if code := die(stderr, errSentinel{}); code != 1 { + t.Fatalf("expected code 1") + } + if stderr.String() == "" { + t.Fatalf("expected stderr output") + } +} + +type errSentinel struct{} + +func (errSentinel) Error() string { return "boom" } + +func TestRunInputDashDefaultOutput(t *testing.T) { + tmp := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("Getwd: %v", err) + } + if err := os.Chdir(tmp); err != nil { + t.Fatalf("Chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "png", "-"}, bytes.NewReader(makeWAV([]int16{0, 1, -1}, 44100, 1)), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(filepath.Join(tmp, "songsee.png")); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestRunFormatJPEG(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "jpeg", "--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if _, err := os.Stat(output); err != nil { + t.Fatalf("missing output: %v", err) + } +} + +func TestWriteImageUnknownFormat(t *testing.T) { + buf := &bytes.Buffer{} + err := writeImage("-", "gif", image.NewRGBA(image.Rect(0, 0, 1, 1)), buf) + if err == nil { + t.Fatalf("expected error") + } +} + +func TestRunVerbose(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.wav") + if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil { + t.Fatalf("write input: %v", err) + } + output := filepath.Join(dir, "out.jpg") + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--verbose", "--output", output, input}, bytes.NewReader(nil), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + if !bytes.Contains(stderr.Bytes(), []byte("decoded:")) { + t.Fatalf("expected verbose output") + } +} + +func testdataPath(t *testing.T, name string) string { + t.Helper() + wd, err := os.Getwd() + if err != nil { + t.Fatalf("Getwd: %v", err) + } + root := filepath.Dir(filepath.Dir(wd)) + path := filepath.Join(root, "testdata", name) + if _, err := os.Stat(path); err != nil { + t.Fatalf("missing testdata: %v", err) + } + return path +} + +func flatImage(img image.Image) bool { + bounds := img.Bounds() + minLum := uint32(0xFFFFFFFF) + maxLum := uint32(0) + for y := bounds.Min.Y; y < bounds.Max.Y; y++ { + for x := bounds.Min.X; x < bounds.Max.X; x++ { + r, g, b, _ := img.At(x, y).RGBA() + lum := (r + g + b) / 3 + if lum < minLum { + minLum = lum + } + if lum > maxLum { + maxLum = lum + } + } + } + return maxLum-minLum < 1000 +} + +func makeWAV(samples []int16, sampleRate int, channels int) []byte { + if channels < 1 { + channels = 1 + } + dataLen := len(samples) * 2 + riffSize := 4 + (8 + 16) + (8 + dataLen) + + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, uint32(riffSize)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 1) + writeU16(buf, uint16(channels)) + writeU32(buf, uint32(sampleRate)) + byteRate := sampleRate * channels * 2 + writeU32(buf, uint32(byteRate)) + blockAlign := channels * 2 + writeU16(buf, uint16(blockAlign)) + writeU16(buf, 16) + + buf.WriteString("data") + writeU32(buf, uint32(dataLen)) + for _, s := range samples { + writeU16(buf, uint16(s)) + } + + return buf.Bytes() +} + +func writeU16(buf *bytes.Buffer, v uint16) { + buf.WriteByte(byte(v)) + buf.WriteByte(byte(v >> 8)) +} + +func writeU32(buf *bytes.Buffer, v uint32) { + buf.WriteByte(byte(v)) + buf.WriteByte(byte(v >> 8)) + buf.WriteByte(byte(v >> 16)) + buf.WriteByte(byte(v >> 24)) +} + +func runToBytes(t *testing.T, wav []byte, style string) []byte { + t.Helper() + stdout := &bytes.Buffer{} + stderr := &bytes.Buffer{} + exit := run([]string{"--format", "png", "--style", style, "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr) + if exit != 0 { + t.Fatalf("exit %d stderr=%s", exit, stderr.String()) + } + return stdout.Bytes() +} + +func genSineMixSamples(n int) []int16 { + out := make([]int16, n) + for i := 0; i < n; i++ { + t := float64(i) / float64(n) + v := 0.5*math.Sin(2*math.Pi*440*t) + 0.4*math.Sin(2*math.Pi*880*t) + out[i] = int16(v * 15000) + } + return out +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d7eff72 --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module github.com/steipete/songsee + +go 1.25 + +require github.com/hajimehoshi/go-mp3 v0.3.4 + +require github.com/alecthomas/kong v1.13.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..7920665 --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= +github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= +github.com/alecthomas/kong v1.13.0 h1:5e/7XC3ugvhP1DQBmTS+WuHtCbcv44hsohMgcvVxSrA= +github.com/alecthomas/kong v1.13.0/go.mod h1:wrlbXem1CWqUV5Vbmss5ISYhsVPkBb1Yo7YKJghju2I= +github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs= +github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= +github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68= +github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo= +github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/audio/audio.go b/internal/audio/audio.go new file mode 100644 index 0000000..ba20eb4 --- /dev/null +++ b/internal/audio/audio.go @@ -0,0 +1,21 @@ +// Package audio handles decoding audio into mono float samples. +package audio + +import "fmt" + +// Audio holds mono samples in [-1,1] range. +type Audio struct { + SampleRate int + Samples []float64 +} + +// Options controls decoding behavior. +type Options struct { + SampleRate int + FFmpegPath string +} + +var ( + // ErrUnsupported is returned when no decoder can handle the input. + ErrUnsupported = fmt.Errorf("unsupported audio format") +) diff --git a/internal/audio/audio_test.go b/internal/audio/audio_test.go new file mode 100644 index 0000000..83e6311 --- /dev/null +++ b/internal/audio/audio_test.go @@ -0,0 +1,249 @@ +package audio + +import ( + "bytes" + "math" + "os" + "path/filepath" + "testing" +) + +func TestDecodeWAVBytes(t *testing.T) { + samples := make([]int16, 1000) + for i := range samples { + samples[i] = int16(2000 * math.Sin(2*math.Pi*float64(i)/50)) + } + data := makeWAV(samples, 44100, 1) + pcm, err := DecodeBytes(data, Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if pcm.SampleRate != 44100 { + t.Fatalf("sample rate = %d", pcm.SampleRate) + } + if len(pcm.Samples) != len(samples) { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeWAVIfNotWAV(t *testing.T) { + _, ok, err := DecodeWAVIf(bytesReader([]byte("NOTWAVE12345"))) + if err != nil { + t.Fatalf("DecodeWAVIf error: %v", err) + } + if ok { + t.Fatalf("expected ok=false") + } +} + +func TestDecodeWAVIfValid(t *testing.T) { + data := makeWAV([]int16{0, 1000, -1000}, 44100, 1) + pcm, ok, err := DecodeWAVIf(bytesReader(data)) + if err != nil { + t.Fatalf("DecodeWAVIf error: %v", err) + } + if !ok { + t.Fatalf("expected ok=true") + } + if len(pcm.Samples) != 3 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeMP3File(t *testing.T) { + path := testdataPath(t, "sine.mp3") + pcm, err := DecodeFile(path, Options{}) + if err != nil { + t.Fatalf("DecodeFile: %v", err) + } + if pcm.SampleRate == 0 || len(pcm.Samples) == 0 { + t.Fatalf("invalid decode result") + } +} + +func TestDecodeMP3IfValid(t *testing.T) { + path := testdataPath(t, "sine.mp3") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + pcm, ok, err := DecodeMP3If(bytesReader(data)) + if err != nil { + t.Fatalf("DecodeMP3If: %v", err) + } + if !ok { + t.Fatalf("expected ok=true") + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeFileUnknownExt(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audio.bin") + if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil { + t.Fatalf("write file: %v", err) + } + pcm, err := DecodeFile(path, Options{}) + if err != nil { + t.Fatalf("DecodeFile: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeFileWAV(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audio.wav") + if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil { + t.Fatalf("write file: %v", err) + } + pcm, err := DecodeFile(path, Options{}) + if err != nil { + t.Fatalf("DecodeFile: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeFileFFmpegFallbackError(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audio.bin") + if err := os.WriteFile(path, []byte("garbagegarbagegarbage"), 0o644); err != nil { + t.Fatalf("write file: %v", err) + } + if _, err := DecodeFile(path, Options{}); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeBytesFFmpegFallbackError(t *testing.T) { + _, err := DecodeBytes([]byte("not audio"), Options{}) + if err == nil { + t.Fatalf("expected error for garbage data") + } +} + +func TestDecodeMP3Bytes(t *testing.T) { + path := testdataPath(t, "sine.mp3") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + pcm, err := DecodeBytes(data, Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeReader(t *testing.T) { + samples := []int16{0, 1000, -1000, 0} + data := makeWAV(samples, 48000, 1) + pcm, err := DecodeReader(bytesReader(data), Options{}) + if err != nil { + t.Fatalf("DecodeReader: %v", err) + } + if pcm.SampleRate != 48000 { + t.Fatalf("sample rate = %d", pcm.SampleRate) + } +} + +func TestDecodeReaderError(t *testing.T) { + _, err := DecodeReader(errReader{}, Options{}) + if err == nil { + t.Fatalf("expected error") + } +} + +func TestSlice(t *testing.T) { + a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}} + out, err := Slice(a, 0.2, 0.5) + if err != nil { + t.Fatalf("Slice: %v", err) + } + if len(out.Samples) != 5 { + t.Fatalf("slice samples = %d", len(out.Samples)) + } +} + +func TestSliceErrors(t *testing.T) { + _, err := Slice(Audio{SampleRate: 10, Samples: []float64{1}}, -1, 1) + if err == nil { + t.Fatalf("expected error for negative start") + } + _, err = Slice(Audio{SampleRate: 0, Samples: []float64{1}}, 0, 1) + if err == nil { + t.Fatalf("expected error for sample rate") + } + _, err = Slice(Audio{SampleRate: 10, Samples: []float64{}}, 0, 1) + if err == nil { + t.Fatalf("expected error for empty samples") + } + _, err = Slice(Audio{SampleRate: 10, Samples: []float64{1}}, 2, 0) + if err == nil { + t.Fatalf("expected error for start") + } +} + +func TestSliceFullDuration(t *testing.T) { + a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3}} + out, err := Slice(a, 0, 0) + if err != nil { + t.Fatalf("Slice: %v", err) + } + if len(out.Samples) != len(a.Samples) { + t.Fatalf("expected full slice") + } +} + +func TestSliceDurationTooShort(t *testing.T) { + a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2}} + if _, err := Slice(a, 0, 0.01); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeMP3IfNotMP3(t *testing.T) { + _, ok, err := DecodeMP3If(bytesReader([]byte("NOTMP3DATA"))) + if err != nil { + t.Fatalf("DecodeMP3If error: %v", err) + } + if ok { + t.Fatalf("expected ok=false") + } +} + +func TestDecodeWAVUnsupported(t *testing.T) { + _, err := decodeWAV(bytesReader([]byte("NOTWAVE12345"))) + if err == nil { + t.Fatalf("expected error") + } +} + +func testdataPath(t *testing.T, name string) string { + t.Helper() + wd, err := os.Getwd() + if err != nil { + t.Fatalf("Getwd: %v", err) + } + root := filepath.Dir(filepath.Dir(wd)) + path := filepath.Join(root, "testdata", name) + if _, err := os.Stat(path); err != nil { + t.Fatalf("missing testdata: %v", err) + } + return path +} + +func bytesReader(b []byte) *bytes.Reader { + return bytes.NewReader(b) +} + +type errReader struct{} + +func (errReader) Read([]byte) (int, error) { return 0, os.ErrInvalid } diff --git a/internal/audio/decode.go b/internal/audio/decode.go new file mode 100644 index 0000000..e5e6ce3 --- /dev/null +++ b/internal/audio/decode.go @@ -0,0 +1,78 @@ +// Package audio handles decoding audio into mono float samples. +package audio + +import ( + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// DecodeFile reads an audio file, decoding WAV/MP3 and falling back to ffmpeg. +func DecodeFile(path string, opts Options) (Audio, error) { + file, err := os.Open(path) + if err != nil { + return Audio{}, err + } + defer func() { _ = file.Close() }() + + ext := strings.ToLower(filepath.Ext(path)) + if ext == ".wav" || ext == ".wave" { + if pcm, ok, err := DecodeWAVIf(file); ok { + return pcm, err + } + } + if ext == ".mp3" { + if pcm, ok, err := DecodeMP3If(file); ok { + return pcm, err + } + } + + if pcm, ok, err := DecodeWAVIf(file); ok { + return pcm, err + } + if pcm, ok, err := DecodeMP3If(file); ok { + return pcm, err + } + + if opts.SampleRate == 0 { + opts.SampleRate = 44100 + } + pcm, err := DecodeWithFFmpeg(path, nil, opts.SampleRate, opts.FFmpegPath) + if err != nil { + return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err) + } + return pcm, nil +} + +// DecodeBytes decodes audio data from a byte slice. +func DecodeBytes(data []byte, opts Options) (Audio, error) { + reader := bytes.NewReader(data) + if pcm, ok, err := DecodeWAVIf(reader); ok { + return pcm, err + } + reader.Reset(data) + if pcm, ok, err := DecodeMP3If(reader); ok { + return pcm, err + } + + if opts.SampleRate == 0 { + opts.SampleRate = 44100 + } + pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), opts.SampleRate, opts.FFmpegPath) + if err != nil { + return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err) + } + return pcm, nil +} + +// DecodeReader decodes audio data from an io.Reader. +func DecodeReader(r io.Reader, opts Options) (Audio, error) { + data, err := io.ReadAll(r) + if err != nil { + return Audio{}, err + } + return DecodeBytes(data, opts) +} diff --git a/internal/audio/ffmpeg.go b/internal/audio/ffmpeg.go new file mode 100644 index 0000000..eaa7168 --- /dev/null +++ b/internal/audio/ffmpeg.go @@ -0,0 +1,67 @@ +// Package audio handles decoding audio into mono float samples. +package audio + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" + "os/exec" +) + +// DecodeWithFFmpeg uses ffmpeg to decode any input into mono float samples. +func DecodeWithFFmpeg(path string, stdin io.Reader, sampleRate int, ffmpegPath string) (Audio, error) { + if sampleRate <= 0 { + sampleRate = 44100 + } + ffmpeg, err := resolveFFmpeg(ffmpegPath) + if err != nil { + return Audio{}, err + } + + args := []string{"-hide_banner", "-loglevel", "error"} + if stdin != nil { + args = append(args, "-i", "pipe:0") + } else { + args = append(args, "-i", path) + } + args = append(args, "-f", "f32le", "-ac", "1", "-ar", fmt.Sprintf("%d", sampleRate), "-") + + cmd := exec.Command(ffmpeg, args...) + if stdin != nil { + cmd.Stdin = stdin + } + var stderr bytes.Buffer + cmd.Stderr = &stderr + out, err := cmd.Output() + if err != nil { + if stderr.Len() > 0 { + return Audio{}, fmt.Errorf("ffmpeg: %v: %s", err, stderr.String()) + } + return Audio{}, err + } + + if len(out)%4 != 0 { + return Audio{}, fmt.Errorf("ffmpeg: unexpected pcm length") + } + + samples := make([]float64, len(out)/4) + for i := 0; i < len(samples); i++ { + bits := binary.LittleEndian.Uint32(out[i*4 : i*4+4]) + samples[i] = float64(math.Float32frombits(bits)) + } + + return Audio{SampleRate: sampleRate, Samples: samples}, nil +} + +func resolveFFmpeg(path string) (string, error) { + if path != "" { + return path, nil + } + ffmpeg, err := exec.LookPath("ffmpeg") + if err != nil { + return "", fmt.Errorf("ffmpeg not found in PATH") + } + return ffmpeg, nil +} diff --git a/internal/audio/ffmpeg_test.go b/internal/audio/ffmpeg_test.go new file mode 100644 index 0000000..bf3f180 --- /dev/null +++ b/internal/audio/ffmpeg_test.go @@ -0,0 +1,74 @@ +package audio + +import ( + "bytes" + "os" + "testing" +) + +func TestResolveFFmpeg(t *testing.T) { + path, err := resolveFFmpeg("") + if err != nil { + t.Fatalf("resolveFFmpeg: %v", err) + } + if path == "" { + t.Fatalf("empty ffmpeg path") + } +} + +func TestResolveFFmpegExplicit(t *testing.T) { + found, err := resolveFFmpeg("") + if err != nil { + t.Fatalf("resolveFFmpeg: %v", err) + } + path, err := resolveFFmpeg(found) + if err != nil { + t.Fatalf("resolveFFmpeg explicit: %v", err) + } + if path == "" { + t.Fatalf("empty ffmpeg path") + } +} + +func TestResolveFFmpegMissing(t *testing.T) { + t.Setenv("PATH", "") + if _, err := resolveFFmpeg(""); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeWithFFmpegFile(t *testing.T) { + path := testdataPath(t, "sine.mp3") + pcm, err := DecodeWithFFmpeg(path, nil, 22050, "") + if err != nil { + t.Fatalf("DecodeWithFFmpeg: %v", err) + } + if pcm.SampleRate != 22050 { + t.Fatalf("sample rate = %d", pcm.SampleRate) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeWithFFmpegStdin(t *testing.T) { + path := testdataPath(t, "sine.mp3") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), 44100, "") + if err != nil { + t.Fatalf("DecodeWithFFmpeg stdin: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeWithFFmpegBadPath(t *testing.T) { + _, err := DecodeWithFFmpeg("missing.mp3", nil, 0, "/no/such/ffmpeg") + if err == nil { + t.Fatalf("expected error") + } +} diff --git a/internal/audio/mp3.go b/internal/audio/mp3.go new file mode 100644 index 0000000..6459821 --- /dev/null +++ b/internal/audio/mp3.go @@ -0,0 +1,72 @@ +package audio + +import ( + "bytes" + "errors" + "io" + + "github.com/hajimehoshi/go-mp3" +) + +// DecodeMP3If tries to decode MP3 data, returning ok=false when not MP3. +func DecodeMP3If(r io.ReadSeeker) (Audio, bool, error) { + header := make([]byte, 4) + if _, err := io.ReadFull(r, header); err != nil { + return Audio{}, false, err + } + isMP3 := string(header[0:3]) == "ID3" || (header[0] == 0xFF && header[1]&0xE0 == 0xE0) + _, _ = r.Seek(0, io.SeekStart) + if !isMP3 { + return Audio{}, false, nil + } + pcm, err := decodeMP3(r) + if err != nil { + return Audio{}, true, err + } + return pcm, true, nil +} + +func decodeMP3(r io.Reader) (Audio, error) { + dec, err := mp3.NewDecoder(r) + if err != nil { + return Audio{}, err + } + pcm, err := io.ReadAll(dec) + if err != nil { + return Audio{}, err + } + if len(pcm)%2 != 0 { + return Audio{}, errors.New("mp3: odd pcm length") + } + + channels := 1 + if len(pcm)%4 == 0 { + channels = 2 + } + frames := len(pcm) / (2 * channels) + out := make([]float64, frames) + + buf := bytes.NewReader(pcm) + for i := 0; i < frames; i++ { + var sum float64 + for ch := 0; ch < channels; ch++ { + var sample int16 + if err := binaryRead(buf, &sample); err != nil { + return Audio{}, err + } + sum += float64(sample) / 32768.0 + } + out[i] = sum / float64(channels) + } + + return Audio{SampleRate: dec.SampleRate(), Samples: out}, nil +} + +func binaryRead(r io.Reader, v *int16) error { + var b [2]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + *v = int16(b[0]) | int16(b[1])<<8 + return nil +} diff --git a/internal/audio/mp3_errors_test.go b/internal/audio/mp3_errors_test.go new file mode 100644 index 0000000..27365bb --- /dev/null +++ b/internal/audio/mp3_errors_test.go @@ -0,0 +1,30 @@ +package audio + +import ( + "bytes" + "testing" +) + +func TestDecodeMP3Error(t *testing.T) { + if _, err := decodeMP3(bytes.NewReader([]byte("not mp3"))); err == nil { + t.Fatalf("expected error") + } +} + +func TestBinaryReadError(t *testing.T) { + var s int16 + if err := binaryRead(bytes.NewReader([]byte{0x01}), &s); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeMP3IfCorrupt(t *testing.T) { + data := []byte{'I', 'D', '3', 0x03, 0x00} + _, ok, err := DecodeMP3If(bytes.NewReader(data)) + if !ok { + t.Fatalf("expected ok=true") + } + if err == nil { + t.Fatalf("expected error") + } +} diff --git a/internal/audio/slice.go b/internal/audio/slice.go new file mode 100644 index 0000000..0ddf4da --- /dev/null +++ b/internal/audio/slice.go @@ -0,0 +1,33 @@ +package audio + +import "fmt" + +// Slice returns a time-based slice of audio in seconds. +func Slice(a Audio, startSec, durationSec float64) (Audio, error) { + if startSec < 0 || durationSec < 0 { + return Audio{}, fmt.Errorf("slice: start and duration must be >= 0") + } + if a.SampleRate <= 0 { + return Audio{}, fmt.Errorf("slice: invalid sample rate") + } + if len(a.Samples) == 0 { + return Audio{}, fmt.Errorf("slice: empty samples") + } + + start := int(startSec * float64(a.SampleRate)) + if start >= len(a.Samples) { + return Audio{}, fmt.Errorf("slice: start beyond end") + } + end := len(a.Samples) + if durationSec > 0 { + end = start + int(durationSec*float64(a.SampleRate)) + if end > len(a.Samples) { + end = len(a.Samples) + } + if end <= start { + return Audio{}, fmt.Errorf("slice: duration too short") + } + } + + return Audio{SampleRate: a.SampleRate, Samples: a.Samples[start:end]}, nil +} diff --git a/internal/audio/wav.go b/internal/audio/wav.go new file mode 100644 index 0000000..f5dea20 --- /dev/null +++ b/internal/audio/wav.go @@ -0,0 +1,229 @@ +package audio + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "math" +) + +// DecodeWAVIf tries to decode WAV data, returning ok=false when not WAV. +func DecodeWAVIf(r io.ReadSeeker) (Audio, bool, error) { + header := make([]byte, 12) + if _, err := io.ReadFull(r, header); err != nil { + return Audio{}, false, err + } + if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" { + _, _ = r.Seek(0, io.SeekStart) + return Audio{}, false, nil + } + _, _ = r.Seek(0, io.SeekStart) + pcm, err := decodeWAV(r) + if err != nil { + return Audio{}, true, err + } + return pcm, true, nil +} + +func decodeWAV(r io.ReadSeeker) (Audio, error) { + var ( + fmtFound bool + dataFound bool + fmtChunk wavFormat + data []byte + ) + + header := make([]byte, 12) + if _, err := io.ReadFull(r, header); err != nil { + return Audio{}, err + } + if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" { + return Audio{}, ErrUnsupported + } + + for { + chunkHeader := make([]byte, 8) + _, err := io.ReadFull(r, chunkHeader) + if err == io.EOF { + break + } + if err != nil { + return Audio{}, err + } + chunkID := string(chunkHeader[0:4]) + chunkSize := int(binary.LittleEndian.Uint32(chunkHeader[4:8])) + + switch chunkID { + case "fmt ": + fmtFound = true + buf := make([]byte, chunkSize) + if _, err := io.ReadFull(r, buf); err != nil { + return Audio{}, err + } + if err := parseWavFormat(buf, &fmtChunk); err != nil { + return Audio{}, err + } + case "data": + dataFound = true + data = make([]byte, chunkSize) + if _, err := io.ReadFull(r, data); err != nil { + return Audio{}, err + } + default: + // Skip unknown chunk. + if _, err := r.Seek(int64(chunkSize), io.SeekCurrent); err != nil { + return Audio{}, err + } + } + if chunkSize%2 == 1 { + _, _ = r.Seek(1, io.SeekCurrent) + } + } + + if !fmtFound || !dataFound { + return Audio{}, errors.New("wav: missing fmt or data chunk") + } + return decodeWavData(fmtChunk, data) +} + +type wavFormat struct { + AudioFormat uint16 + NumChannels uint16 + SampleRate uint32 + BitsPerSample uint16 + Extensible bool + SubFormat [16]byte +} + +func parseWavFormat(buf []byte, fmtChunk *wavFormat) error { + if len(buf) < 16 { + return errors.New("wav: short fmt chunk") + } + fmtChunk.AudioFormat = binary.LittleEndian.Uint16(buf[0:2]) + fmtChunk.NumChannels = binary.LittleEndian.Uint16(buf[2:4]) + fmtChunk.SampleRate = binary.LittleEndian.Uint32(buf[4:8]) + fmtChunk.BitsPerSample = binary.LittleEndian.Uint16(buf[14:16]) + if fmtChunk.AudioFormat == 0xFFFE && len(buf) >= 40 { + fmtChunk.Extensible = true + copy(fmtChunk.SubFormat[:], buf[24:40]) + } + return nil +} + +func decodeWavData(fmtChunk wavFormat, data []byte) (Audio, error) { + format := fmtChunk.AudioFormat + if fmtChunk.Extensible { + // PCM subformat GUID 00000001-0000-0010-8000-00aa00389b71 + if isGUID(fmtChunk.SubFormat, 0x00000001) { + format = 1 + } else if isGUID(fmtChunk.SubFormat, 0x00000003) { + format = 3 + } + } + + switch format { + case 1, 3: + // PCM or IEEE float. + default: + return Audio{}, fmt.Errorf("wav: unsupported format %d", format) + } + + channels := int(fmtChunk.NumChannels) + if channels < 1 { + return Audio{}, errors.New("wav: invalid channel count") + } + + sampleRate := int(fmtChunk.SampleRate) + bits := int(fmtChunk.BitsPerSample) + if bits == 0 { + return Audio{}, errors.New("wav: invalid bits per sample") + } + + var samples []float64 + if format == 3 { + samples = decodeWavFloat(data, bits, channels) + } else { + samples = decodeWavPCM(data, bits, channels) + } + if samples == nil { + return Audio{}, fmt.Errorf("wav: unsupported bit depth %d", bits) + } + + return Audio{SampleRate: sampleRate, Samples: samples}, nil +} + +func decodeWavPCM(data []byte, bits, channels int) []float64 { + bytesPerSample := bits / 8 + frameSize := bytesPerSample * channels + if frameSize == 0 { + return nil + } + frames := len(data) / frameSize + out := make([]float64, frames) + idx := 0 + for i := 0; i < frames; i++ { + var sum float64 + for ch := 0; ch < channels; ch++ { + off := idx + ch*bytesPerSample + var v int32 + switch bits { + case 8: + v = int32(int(data[off]) - 128) + case 16: + v = int32(int16(binary.LittleEndian.Uint16(data[off : off+2]))) + case 24: + b := data[off : off+3] + v = int32(b[0]) | int32(b[1])<<8 | int32(b[2])<<16 + if v&0x800000 != 0 { + v |= ^0xffffff + } + case 32: + v = int32(binary.LittleEndian.Uint32(data[off : off+4])) + default: + return nil + } + scale := float64(int64(1) << (bits - 1)) + sum += float64(v) / scale + } + out[i] = sum / float64(channels) + idx += frameSize + } + return out +} + +func decodeWavFloat(data []byte, bits, channels int) []float64 { + bytesPerSample := bits / 8 + frameSize := bytesPerSample * channels + if frameSize == 0 { + return nil + } + frames := len(data) / frameSize + out := make([]float64, frames) + idx := 0 + for i := 0; i < frames; i++ { + var sum float64 + for ch := 0; ch < channels; ch++ { + off := idx + ch*bytesPerSample + switch bits { + case 32: + sum += float64(math.Float32frombits(binary.LittleEndian.Uint32(data[off : off+4]))) + case 64: + sum += math.Float64frombits(binary.LittleEndian.Uint64(data[off : off+8])) + default: + return nil + } + } + out[i] = sum / float64(channels) + idx += frameSize + } + return out +} + +func isGUID(b [16]byte, sub uint32) bool { + return binary.LittleEndian.Uint32(b[0:4]) == sub && + binary.LittleEndian.Uint16(b[4:6]) == 0x0000 && + binary.LittleEndian.Uint16(b[6:8]) == 0x0010 && + b[8] == 0x80 && b[9] == 0x00 && + b[10] == 0x00 && b[11] == 0xAA && b[12] == 0x00 && b[13] == 0x38 && b[14] == 0x9B && b[15] == 0x71 +} diff --git a/internal/audio/wav_extensible_test.go b/internal/audio/wav_extensible_test.go new file mode 100644 index 0000000..a003e1d --- /dev/null +++ b/internal/audio/wav_extensible_test.go @@ -0,0 +1,47 @@ +package audio + +import ( + "bytes" + "testing" +) + +func TestDecodeWAVExtensiblePCM(t *testing.T) { + payload := []byte{0, 0, 0, 0} + buf := &bytes.Buffer{} + riffSize := 4 + (8 + 40) + (8 + len(payload)) + + buf.WriteString("RIFF") + writeU32(buf, uint32(riffSize)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 40) + writeU16(buf, 0xFFFE) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*2) + writeU16(buf, 2) + writeU16(buf, 16) + writeU16(buf, 22) + writeU16(buf, 16) + writeU32(buf, 1) + buf.Write([]byte{ + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, + 0x10, 0x00, + 0x80, 0x00, + 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71, + }) + + buf.WriteString("data") + writeU32(buf, uint32(len(payload))) + buf.Write(payload) + + pcm, err := DecodeBytes(buf.Bytes(), Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} diff --git a/internal/audio/wav_extra_test.go b/internal/audio/wav_extra_test.go new file mode 100644 index 0000000..8cddeaf --- /dev/null +++ b/internal/audio/wav_extra_test.go @@ -0,0 +1,118 @@ +package audio + +import ( + "bytes" + "testing" +) + +func TestDecodeWAVWithExtraChunk(t *testing.T) { + payload := []byte{0, 0, 0, 0} + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, uint32(4+(8+16)+(8+5)+(8+len(payload)))) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 1) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*2) + writeU16(buf, 2) + writeU16(buf, 16) + + buf.WriteString("JUNK") + writeU32(buf, 5) + buf.Write([]byte{1, 2, 3, 4, 5}) + buf.WriteByte(0) + + buf.WriteString("data") + writeU32(buf, uint32(len(payload))) + buf.Write(payload) + + pcm, err := DecodeBytes(buf.Bytes(), Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) == 0 { + t.Fatalf("empty samples") + } +} + +func TestDecodeWAVMissingData(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4+(8+16)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 1) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*2) + writeU16(buf, 2) + writeU16(buf, 16) + + if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil { + t.Fatalf("expected error for missing data") + } +} + +func TestDecodeWAVHeaderOnly(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4) + buf.WriteString("WAVE") + if _, err := decodeWAV(bytes.NewReader(buf.Bytes())); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeWAVInvalidChannels(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4+(8+16)+(8+2)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 1) + writeU16(buf, 0) + writeU32(buf, 44100) + writeU32(buf, 44100*2) + writeU16(buf, 2) + writeU16(buf, 16) + + buf.WriteString("data") + writeU32(buf, 2) + buf.Write([]byte{0, 0}) + + if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil { + t.Fatalf("expected error for channels") + } +} + +func TestDecodeWAVFloatUnsupportedBits(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4+(8+16)+(8+3)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 3) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*3) + writeU16(buf, 3) + writeU16(buf, 24) + + buf.WriteString("data") + writeU32(buf, 3) + buf.Write([]byte{0, 0, 0}) + + if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil { + t.Fatalf("expected error for float bit depth") + } +} diff --git a/internal/audio/wav_float_test.go b/internal/audio/wav_float_test.go new file mode 100644 index 0000000..856dc4d --- /dev/null +++ b/internal/audio/wav_float_test.go @@ -0,0 +1,77 @@ +package audio + +import ( + "bytes" + "encoding/binary" + "testing" +) + +func TestDecodeWAVFloat32(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4+(8+16)+(8+8)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 3) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*4) + writeU16(buf, 4) + writeU16(buf, 32) + + buf.WriteString("data") + writeU32(buf, 8) + _ = binary.Write(buf, binary.LittleEndian, float32(0.5)) + _ = binary.Write(buf, binary.LittleEndian, float32(-0.25)) + + pcm, err := DecodeBytes(buf.Bytes(), Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) != 2 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeWAVFloat64(t *testing.T) { + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + writeU32(buf, 4+(8+16)+(8+16)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, 3) + writeU16(buf, 1) + writeU32(buf, 44100) + writeU32(buf, 44100*8) + writeU16(buf, 8) + writeU16(buf, 64) + + buf.WriteString("data") + writeU32(buf, 16) + _ = binary.Write(buf, binary.LittleEndian, float64(0.5)) + _ = binary.Write(buf, binary.LittleEndian, float64(-0.25)) + + pcm, err := DecodeBytes(buf.Bytes(), Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) != 2 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func writeU16(buf *bytes.Buffer, v uint16) { + buf.WriteByte(byte(v)) + buf.WriteByte(byte(v >> 8)) +} + +func writeU32(buf *bytes.Buffer, v uint32) { + buf.WriteByte(byte(v)) + buf.WriteByte(byte(v >> 8)) + buf.WriteByte(byte(v >> 16)) + buf.WriteByte(byte(v >> 24)) +} diff --git a/internal/audio/wav_pcm_test.go b/internal/audio/wav_pcm_test.go new file mode 100644 index 0000000..1693cd0 --- /dev/null +++ b/internal/audio/wav_pcm_test.go @@ -0,0 +1,101 @@ +package audio + +import ( + "bytes" + "encoding/binary" + "testing" +) + +func TestDecodeWAVPCM8(t *testing.T) { + data := makeWAVPCM(8, []int32{0, 64, -64, 0}, 44100) + pcm, err := DecodeBytes(data, Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) != 4 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeWAVPCM24(t *testing.T) { + data := makeWAVPCM(24, []int32{0, 100000, -100000, 0}, 44100) + pcm, err := DecodeBytes(data, Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) != 4 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeWAVPCM32(t *testing.T) { + data := makeWAVPCM(32, []int32{0, 100000, -100000, 0}, 44100) + pcm, err := DecodeBytes(data, Options{}) + if err != nil { + t.Fatalf("DecodeBytes: %v", err) + } + if len(pcm.Samples) != 4 { + t.Fatalf("samples = %d", len(pcm.Samples)) + } +} + +func TestDecodeWAVUnsupportedFormat(t *testing.T) { + data := makeWAVCustom(7, 16, []byte{0, 0}, 44100) + if _, err := DecodeBytes(data, Options{}); err == nil { + t.Fatalf("expected error") + } +} + +func TestDecodeWAVUnsupportedBits(t *testing.T) { + data := makeWAVCustom(1, 12, []byte{0, 0}, 44100) + if _, err := DecodeBytes(data, Options{}); err == nil { + t.Fatalf("expected error") + } +} + +func makeWAVPCM(bits int, samples []int32, sampleRate int) []byte { + data := &bytes.Buffer{} + for _, s := range samples { + switch bits { + case 8: + b := byte(int(s) + 128) + data.WriteByte(b) + case 16: + _ = binary.Write(data, binary.LittleEndian, int16(s)) + case 24: + v := uint32(int32(s)) + data.WriteByte(byte(v)) + data.WriteByte(byte(v >> 8)) + data.WriteByte(byte(v >> 16)) + case 32: + _ = binary.Write(data, binary.LittleEndian, int32(s)) + } + } + return makeWAVCustom(1, bits, data.Bytes(), sampleRate) +} + +func makeWAVCustom(format uint16, bits int, payload []byte, sampleRate int) []byte { + buf := &bytes.Buffer{} + riffSize := 4 + (8 + 16) + (8 + len(payload)) + + buf.WriteString("RIFF") + writeU32(buf, uint32(riffSize)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + writeU32(buf, 16) + writeU16(buf, format) + writeU16(buf, 1) + writeU32(buf, uint32(sampleRate)) + byteRate := sampleRate * (bits / 8) + writeU32(buf, uint32(byteRate)) + blockAlign := bits / 8 + writeU16(buf, uint16(blockAlign)) + writeU16(buf, uint16(bits)) + + buf.WriteString("data") + writeU32(buf, uint32(len(payload))) + buf.Write(payload) + + return buf.Bytes() +} diff --git a/internal/audio/wav_test.go b/internal/audio/wav_test.go new file mode 100644 index 0000000..0eb5e95 --- /dev/null +++ b/internal/audio/wav_test.go @@ -0,0 +1,38 @@ +package audio + +import ( + "bytes" + "encoding/binary" +) + +func makeWAV(samples []int16, sampleRate int, channels int) []byte { + if channels < 1 { + channels = 1 + } + dataLen := len(samples) * 2 + riffSize := 4 + (8 + 16) + (8 + dataLen) + + buf := &bytes.Buffer{} + buf.WriteString("RIFF") + _ = binary.Write(buf, binary.LittleEndian, uint32(riffSize)) + buf.WriteString("WAVE") + + buf.WriteString("fmt ") + _ = binary.Write(buf, binary.LittleEndian, uint32(16)) + _ = binary.Write(buf, binary.LittleEndian, uint16(1)) + _ = binary.Write(buf, binary.LittleEndian, uint16(channels)) + _ = binary.Write(buf, binary.LittleEndian, uint32(sampleRate)) + byteRate := sampleRate * channels * 2 + _ = binary.Write(buf, binary.LittleEndian, uint32(byteRate)) + blockAlign := channels * 2 + _ = binary.Write(buf, binary.LittleEndian, uint16(blockAlign)) + _ = binary.Write(buf, binary.LittleEndian, uint16(16)) + + buf.WriteString("data") + _ = binary.Write(buf, binary.LittleEndian, uint32(dataLen)) + for _, s := range samples { + _ = binary.Write(buf, binary.LittleEndian, s) + } + + return buf.Bytes() +} diff --git a/internal/dsp/fft.go b/internal/dsp/fft.go new file mode 100644 index 0000000..8ca0ad2 --- /dev/null +++ b/internal/dsp/fft.go @@ -0,0 +1,40 @@ +// Package dsp provides spectral analysis utilities. +package dsp + +import "math" + +// FFTInPlace computes the in-place FFT for length power-of-two slices. +func FFTInPlace(x []complex128) { + n := len(x) + if n <= 1 { + return + } + + // Bit-reversal permutation. + j := 0 + for i := 1; i < n; i++ { + bit := n >> 1 + for ; j&bit != 0; bit >>= 1 { + j &= ^bit + } + j |= bit + if i < j { + x[i], x[j] = x[j], x[i] + } + } + + for size := 2; size <= n; size <<= 1 { + angle := -2 * math.Pi / float64(size) + wlen := complex(math.Cos(angle), math.Sin(angle)) + for i := 0; i < n; i += size { + w := complex(1, 0) + for j := 0; j < size/2; j++ { + u := x[i+j] + v := w * x[i+j+size/2] + x[i+j] = u + v + x[i+j+size/2] = u - v + w *= wlen + } + } + } +} diff --git a/internal/dsp/fft_test.go b/internal/dsp/fft_test.go new file mode 100644 index 0000000..a445a78 --- /dev/null +++ b/internal/dsp/fft_test.go @@ -0,0 +1,13 @@ +package dsp + +import "testing" + +func TestFFTImpulse(t *testing.T) { + x := []complex128{1, 0, 0, 0} + FFTInPlace(x) + for i, v := range x { + if real(v) < 0.99 || real(v) > 1.01 || imag(v) != 0 { + t.Fatalf("bin %d = %v", i, v) + } + } +} diff --git a/internal/dsp/spectrogram.go b/internal/dsp/spectrogram.go new file mode 100644 index 0000000..273d6ee --- /dev/null +++ b/internal/dsp/spectrogram.go @@ -0,0 +1,98 @@ +// Package dsp provides spectral analysis utilities. +package dsp + +import ( + "math" +) + +// Spectrogram contains log-magnitude FFT frames. +type Spectrogram struct { + Frames int + Bins int + Values []float64 + Min float64 + Max float64 + SampleRate int + WindowSize int + BinHz float64 +} + +// HannWindow returns a Hann window of length n. +func HannWindow(n int) []float64 { + w := make([]float64, n) + if n == 1 { + w[0] = 1 + return w + } + for i := 0; i < n; i++ { + w[i] = 0.5 - 0.5*math.Cos(2*math.Pi*float64(i)/float64(n-1)) + } + return w +} + +// ComputeSpectrogram computes a log-magnitude spectrogram. +func ComputeSpectrogram(samples []float64, sampleRate, windowSize, hopSize int) Spectrogram { + if windowSize <= 0 { + windowSize = 2048 + } + if hopSize <= 0 { + hopSize = windowSize / 4 + } + if hopSize <= 0 { + hopSize = 1 + } + if sampleRate <= 0 { + sampleRate = 44100 + } + + frames := 1 + if len(samples) > windowSize { + frames = 1 + (len(samples)-windowSize+hopSize-1)/hopSize + } + bins := windowSize/2 + 1 + values := make([]float64, frames*bins) + + window := HannWindow(windowSize) + minVal := math.Inf(1) + maxVal := math.Inf(-1) + eps := 1e-9 + + frame := make([]complex128, windowSize) + for f := 0; f < frames; f++ { + start := f * hopSize + for i := 0; i < windowSize; i++ { + idx := start + i + if idx < len(samples) { + frame[i] = complex(samples[idx]*window[i], 0) + } else { + frame[i] = 0 + } + } + FFTInPlace(frame) + for b := 0; b < bins; b++ { + re := real(frame[b]) + im := imag(frame[b]) + mag := math.Sqrt(re*re + im*im) + db := 20 * math.Log10(mag+eps) + values[f*bins+b] = db + if db < minVal { + minVal = db + } + if db > maxVal { + maxVal = db + } + } + } + + binHz := float64(sampleRate) / float64(windowSize) + return Spectrogram{ + Frames: frames, + Bins: bins, + Values: values, + Min: minVal, + Max: maxVal, + SampleRate: sampleRate, + WindowSize: windowSize, + BinHz: binHz, + } +} diff --git a/internal/dsp/spectrogram_test.go b/internal/dsp/spectrogram_test.go new file mode 100644 index 0000000..839f071 --- /dev/null +++ b/internal/dsp/spectrogram_test.go @@ -0,0 +1,51 @@ +package dsp + +import "testing" + +func TestComputeSpectrogram(t *testing.T) { + samples := make([]float64, 4096) + for i := range samples { + samples[i] = 0.5 + } + spec := ComputeSpectrogram(samples, 44100, 1024, 256) + if spec.Frames <= 0 || spec.Bins <= 0 { + t.Fatalf("invalid spec size") + } + if len(spec.Values) != spec.Frames*spec.Bins { + t.Fatalf("values len mismatch") + } + if spec.Min >= spec.Max { + t.Fatalf("min/max not set") + } + if spec.BinHz <= 0 { + t.Fatalf("invalid bin hz") + } +} + +func TestHannWindow(t *testing.T) { + w1 := HannWindow(1) + if len(w1) != 1 || w1[0] != 1 { + t.Fatalf("hann size 1") + } + w := HannWindow(4) + if len(w) != 4 { + t.Fatalf("hann size 4") + } + if w[0] != 0 || w[3] != 0 { + t.Fatalf("hann endpoints") + } +} + +func TestComputeSpectrogramDefaults(t *testing.T) { + samples := make([]float64, 100) + spec := ComputeSpectrogram(samples, 0, 0, 0) + if spec.SampleRate != 44100 { + t.Fatalf("sample rate default = %d", spec.SampleRate) + } + if spec.WindowSize != 2048 { + t.Fatalf("window default = %d", spec.WindowSize) + } + if spec.Frames != 1 { + t.Fatalf("frames = %d", spec.Frames) + } +} diff --git a/internal/render/palette.go b/internal/render/palette.go new file mode 100644 index 0000000..dc02279 --- /dev/null +++ b/internal/render/palette.go @@ -0,0 +1,92 @@ +// Package render turns spectrograms into images. +package render + +import ( + "errors" + "image/color" +) + +// Palette maps a normalized value to a color. +type Palette func(t float64) color.RGBA + +type stop struct { + pos float64 + c color.RGBA +} + +// PaletteByName returns a palette for a given name. +func PaletteByName(name string) (Palette, error) { + switch name { + case "classic": + return gradient([]stop{ + {0.0, rgb(0, 0, 0)}, + {0.2, rgb(0, 32, 96)}, + {0.45, rgb(0, 160, 200)}, + {0.7, rgb(255, 180, 0)}, + {1.0, rgb(255, 255, 255)}, + }), nil + case "magma": + return gradient([]stop{ + {0.0, rgb(0, 0, 4)}, + {0.25, rgb(59, 12, 87)}, + {0.5, rgb(180, 54, 122)}, + {0.75, rgb(251, 140, 60)}, + {1.0, rgb(252, 253, 191)}, + }), nil + case "inferno": + return gradient([]stop{ + {0.0, rgb(0, 0, 4)}, + {0.25, rgb(61, 9, 101)}, + {0.5, rgb(187, 55, 84)}, + {0.75, rgb(249, 142, 8)}, + {1.0, rgb(252, 255, 164)}, + }), nil + case "viridis": + return gradient([]stop{ + {0.0, rgb(68, 1, 84)}, + {0.25, rgb(58, 82, 139)}, + {0.5, rgb(32, 144, 140)}, + {0.75, rgb(94, 201, 98)}, + {1.0, rgb(253, 231, 37)}, + }), nil + case "gray", "grey": + return gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 255, 255)}}), nil + default: + return nil, errors.New("unknown palette") + } +} + +func gradient(stops []stop) Palette { + return func(t float64) color.RGBA { + if t <= 0 { + return stops[0].c + } + if t >= 1 { + return stops[len(stops)-1].c + } + for i := 0; i < len(stops)-1; i++ { + if t >= stops[i].pos && t <= stops[i+1].pos { + span := stops[i+1].pos - stops[i].pos + if span <= 0 { + return stops[i+1].c + } + local := (t - stops[i].pos) / span + return lerp(stops[i].c, stops[i+1].c, local) + } + } + return stops[len(stops)-1].c + } +} + +func lerp(a, b color.RGBA, t float64) color.RGBA { + return color.RGBA{ + R: uint8(float64(a.R) + (float64(b.R)-float64(a.R))*t), + G: uint8(float64(a.G) + (float64(b.G)-float64(a.G))*t), + B: uint8(float64(a.B) + (float64(b.B)-float64(a.B))*t), + A: 255, + } +} + +func rgb(r, g, b uint8) color.RGBA { + return color.RGBA{R: r, G: g, B: b, A: 255} +} diff --git a/internal/render/render.go b/internal/render/render.go new file mode 100644 index 0000000..afdc369 --- /dev/null +++ b/internal/render/render.go @@ -0,0 +1,102 @@ +// Package render turns spectrograms into images. +package render + +import ( + "fmt" + "image" + "math" + + "github.com/steipete/songsee/internal/dsp" +) + +// Options configures spectrogram rendering. +type Options struct { + Width int + Height int + MinFreq float64 + MaxFreq float64 + Palette Palette + MinDB float64 + MaxDB float64 + ClampDB bool + FlipVert bool +} + +// Spectrogram renders a spectrogram into an RGBA image. +func Spectrogram(spec dsp.Spectrogram, opts Options) (*image.RGBA, error) { + if opts.Width <= 0 || opts.Height <= 0 { + return nil, fmt.Errorf("invalid output size") + } + if opts.Palette == nil { + return nil, fmt.Errorf("palette required") + } + + minDB := spec.Min + maxDB := spec.Max + if opts.ClampDB { + minDB = opts.MinDB + maxDB = opts.MaxDB + } + if maxDB <= minDB { + maxDB = minDB + 1 + } + + minBin := 0 + maxBin := spec.Bins - 1 + if opts.MinFreq > 0 { + minBin = int(opts.MinFreq / spec.BinHz) + } + if opts.MaxFreq > 0 { + maxBin = int(opts.MaxFreq / spec.BinHz) + } + if minBin < 0 { + minBin = 0 + } + if maxBin >= spec.Bins { + maxBin = spec.Bins - 1 + } + if maxBin <= minBin { + minBin = 0 + maxBin = spec.Bins - 1 + } + binSpan := maxBin - minBin + + img := image.NewRGBA(image.Rect(0, 0, opts.Width, opts.Height)) + frames := spec.Frames + bins := spec.Bins + for x := 0; x < opts.Width; x++ { + frame := 0 + if frames > 1 && opts.Width > 1 { + frame = int(math.Round(float64(x) * float64(frames-1) / float64(opts.Width-1))) + } + frameOffset := frame * bins + for y := 0; y < opts.Height; y++ { + pos := 0.0 + if opts.Height > 1 { + pos = float64(y) / float64(opts.Height-1) + } + bin := minBin + int(math.Round((1-pos)*float64(binSpan))) + if bin < minBin { + bin = minBin + } + if bin > maxBin { + bin = maxBin + } + val := spec.Values[frameOffset+bin] + norm := (val - minDB) / (maxDB - minDB) + if norm < 0 { + norm = 0 + } + if norm > 1 { + norm = 1 + } + c := opts.Palette(norm) + ypos := y + if opts.FlipVert { + ypos = opts.Height - 1 - y + } + img.SetRGBA(x, ypos, c) + } + } + return img, nil +} diff --git a/internal/render/render_test.go b/internal/render/render_test.go new file mode 100644 index 0000000..1e2c23f --- /dev/null +++ b/internal/render/render_test.go @@ -0,0 +1,154 @@ +package render + +import ( + "image/color" + "testing" + + "github.com/steipete/songsee/internal/dsp" +) + +func TestPaletteByName(t *testing.T) { + names := []string{"classic", "magma", "inferno", "viridis", "gray", "grey"} + for _, name := range names { + if _, err := PaletteByName(name); err != nil { + t.Fatalf("palette %s: %v", name, err) + } + } + if _, err := PaletteByName("nope"); err == nil { + t.Fatalf("expected error for unknown palette") + } +} + +func TestRenderSpectrogram(t *testing.T) { + spec := dsp.Spectrogram{ + Frames: 2, + Bins: 2, + Values: []float64{-20, -5, -10, -1}, + Min: -20, + Max: -1, + BinHz: 100, + } + img, err := Spectrogram(spec, Options{ + Width: 4, + Height: 4, + Palette: func(t float64) color.RGBA { return color.RGBA{R: uint8(255 * t), A: 255} }, + }) + if err != nil { + t.Fatalf("RenderSpectrogram: %v", err) + } + if img.Bounds().Dx() != 4 || img.Bounds().Dy() != 4 { + t.Fatalf("unexpected bounds") + } + c1 := img.RGBAAt(0, 0) + c2 := img.RGBAAt(3, 3) + if c1 == c2 { + t.Fatalf("expected varying pixels") + } +} + +func TestRenderSpectrogramErrors(t *testing.T) { + spec := dsp.Spectrogram{ + Frames: 1, + Bins: 1, + Values: []float64{0}, + Min: 0, + Max: 1, + BinHz: 100, + } + if _, err := Spectrogram(spec, Options{Width: 0, Height: 1, Palette: func(float64) color.RGBA { return color.RGBA{} }}); err == nil { + t.Fatalf("expected size error") + } + if _, err := Spectrogram(spec, Options{Width: 1, Height: 1}); err == nil { + t.Fatalf("expected palette error") + } +} + +func TestRenderSpectrogramClampAndRange(t *testing.T) { + spec := dsp.Spectrogram{ + Frames: 3, + Bins: 4, + Values: []float64{-80, -40, -20, 0, -70, -35, -15, -2, -60, -30, -10, -1}, + Min: -80, + Max: 0, + BinHz: 100, + } + img, err := Spectrogram(spec, Options{ + Width: 3, + Height: 2, + MinFreq: 50, + MaxFreq: 250, + Palette: func(t float64) color.RGBA { return color.RGBA{B: uint8(255 * t), A: 255} }, + MinDB: -60, + MaxDB: -10, + ClampDB: true, + FlipVert: true, + }) + if err != nil { + t.Fatalf("RenderSpectrogram: %v", err) + } + if img.Bounds().Dx() != 3 || img.Bounds().Dy() != 2 { + t.Fatalf("unexpected bounds") + } +} + +func TestGradientEndpoints(t *testing.T) { + p := gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 0, 0)}}) + if c := p(0); c.R != 0 || c.G != 0 || c.B != 0 { + t.Fatalf("start color mismatch") + } + if c := p(1); c.R != 255 || c.G != 0 || c.B != 0 { + t.Fatalf("end color mismatch") + } + if c := p(0.5); c.R == 0 || c.R == 255 { + t.Fatalf("mid color not interpolated") + } + if c := p(-1); c.R != 0 { + t.Fatalf("clamp low") + } + if c := p(2); c.R != 255 { + t.Fatalf("clamp high") + } +} + +func TestRenderSpectrogramSinglePixel(t *testing.T) { + spec := dsp.Spectrogram{ + Frames: 1, + Bins: 1, + Values: []float64{-10}, + Min: -10, + Max: -10, + BinHz: 100, + } + img, err := Spectrogram(spec, Options{ + Width: 1, + Height: 1, + Palette: func(_ float64) color.RGBA { return color.RGBA{G: 200, A: 255} }, + }) + if err != nil { + t.Fatalf("RenderSpectrogram: %v", err) + } + if img.Bounds().Dx() != 1 || img.Bounds().Dy() != 1 { + t.Fatalf("unexpected bounds") + } +} + +func TestRenderSpectrogramRangeReset(t *testing.T) { + spec := dsp.Spectrogram{ + Frames: 2, + Bins: 3, + Values: []float64{-10, -5, -1, -10, -5, -1}, + Min: -10, + Max: -1, + BinHz: 100, + } + _, err := Spectrogram(spec, Options{ + Width: 2, + Height: 2, + MinFreq: 1000, + MaxFreq: 200, + Palette: func(_ float64) color.RGBA { return color.RGBA{R: 50, A: 255} }, + }) + if err != nil { + t.Fatalf("RenderSpectrogram: %v", err) + } +} diff --git a/testdata/sine.mp3 b/testdata/sine.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..2f42cb84ece2288ff45a2ba830fc585b1b3a3792 GIT binary patch literal 4986 zcmd6rc~lek7RP^Kk`PuwP<9Ec>;yzaMF|iV*<_cUBrFA?6{;v&tOF4Q9wI(L0e!Eb zE@i7C$}6%+$|4Aeu?kgi39b~PVvB-FDsLu0j&0J@<9YwQIS1x9!{mPE-uu1xcf%IS z8Z1B${otS=H{`4c0OUj4ooH`swZ_VZL=ygS;nzH$%=+kZVHuYE#b?NB8S-NSfQlb5 zHMO?3v$NZsi~>8mBnIRyEZT|z-Et(jEs$qkB>ipJ~cJ<<_(X>o1aI?$UDX% z?`UI#UMwAkxU7YxieL4~Jp$o+p5M?70FGni8axe5y_C2I?C+2PAhfao7|zdp5@`kB z1#E&8E4S|UI?+=tbjb_TAOX$Rg+#>W^I zd3Tg4w*#@%^$`Gk&~J6H(jhmO z;TlNGm2Rs8WuBSHT&?6|3@c!vMpql`Ej>|LUDI>S2M)PE5IDDa&ji_zEnFCT>JNyHc4ff~n~2W0UvuS+yf0pI|xeK6Zc zPF_wQ-AwBJyhLi*u*l?}eO|ieWBEd(-apOSmAcg0+9{m#I3Bejn`rl10N-zWHho~k z$bpn}g4#5h_@#=?j=dRwW0H^iorAvRN>0r>#yce~uzjCF&(n*9WTV69g>UI4y`F8g z1E@iP4(uft;i%A%6Yz0_si@qScmoi1||_u-TOlqaS~$8#|aid?Amp%Jc1Uh zan8y6R;NpWUG!TpoCKs&={j_E0iHdpOnNj=qfE2OsG!L5PThMj7?z zHQbe?OY0raCdNYb4g!+lQusw@K5j~w2MW7cgV1N}5vm^UpH1+Wbh z$rGm5wnipgV#|xPm0@ae;+$#Fm8KgvNXy~GFzT8maRf`8aS-f4xl+}Gmkm^Pt9t6a zrSAAu))&nH^vE@`6U&&ILpWkO;NixE9A}e*hliI{d(vane@(cw`|UBvJN6cC1P1a{ z$5nKUH73O6wVH3_Sk6B#w44;@Ai73$@B~o97*ox&Z9Zb0T7mR}tb+`l!(9(Gd(y9y z^$!N(FB)l_fBQ}^0|wBH#US}Ww^Z16KY17b9Yxu0lVTi1*9d2j_LNA2rZN#7h;wQM zc)?9HazUytk%q=CUb}pc5WK!gtg(0Y`C&Zo1pwNV^kM^LR)`#1J}#?8=H4M+5eLmO zs%wOkk8*_dGl%%Il>U#-0s?n|W0(+0k#6>+daWukQ)GmlB@bQj#Pz_yGC1O^mHQ9s zd&`Yu{V3u77G;0Mv6&#!A+E!m#cfxu4TjpiBmNn4NXBwM zQEepv7Ubd0*Wx+ta_eDNs(2kEZx{a8V%v3Ae`JK;~$^-_ysBW(csH;=7emK zN3xkc$+?_j(Y^}~x(h%<&KytkH>le3JA$5x`3l*yk~q5-I0>i@IO6pa62iw%3FGD$ zP+}Lzs`geOH*T&-HK$zg-kL+Xm2EM};=1b;wR7TOOUGou-G zsIEWuV{y%w?qA^0SWPa~6SDQ{hInmUwiPUAb>SY~4+e6CXw7f)r_GO!;h1yLC-E6x zB7Z)rpItgAr#cEsZGPL9D?x93=|N!yDYMtD4RrP|)6}y7r?~BfmeZ9#zjHpu0Z=~K z-_@REj@zwZ`}LiyqVn*+<|IWj>h>P`UQ)m2<6Ewxy7u~3s(M^zLM9%gZ?Q!_+o6Qo z(h;w*0RUs-Pd1dx{ww-kGRRrh71xx_Bym!869zXW5u+G&nX~m0>W7DG6=GbMtJ-~u z+2Uy%<&d+f@$KA&&J@c`{|qu~Ie_PY#PXXo?s2 zpG+N;QymGR{%A|kK?o1m-+_`8DXZ=69IJ>0>5p;q(RA|t#KtkAnwfmAWn}YZw--X9|@{CHj-96FaymJ}HWk>M+~p zE9Q@2kzq)F5-zoNP_de*+}LjneKDYMhIhq9%>e)x4;4$-~#ZHNqxdYb)iRY~V4 zdu43-!7ji?>q4Da+z?KYPUN8Fgh?b+zc){E-1xWwVb~J4SPkM_Nw&;xcQ)aoe4~=D zL0uzs*~RU;iwhHexau=a{XR|J*vt0Z|B$~-1t5xnom5j}$zg_7HJ{#4_3?|8Fz>*KKe2sIzy#SB z_DYyTcv3YDsN8(2v_gSPIVVPsXf%NhE8f73KmNWEI>rQYE1dr|eAr7t({NNKC)>*> z_sUE0{pXc%9!2`wL?Ehz1|zZh;X{?e{KV?#grqeZcLyzZytt?JmHzAacPsBnU1tL+ zRV$OUnNBdhEWl0U-=R42ib-+3A^T~O4muy0P#Gzi0!IZg9n=c2EyBQ&az<*icive+ zI%@Pw{tz~e?6^Ogxw##H&&;np2p;wxa;)G~>*FWBQK@bf%a4$Q=tQkg{I~Nj@@E$3z>piglhAV;RCs1s(TgPo1MOUBiANpYu#Pxdn^DbprFsbR%3jP zx_?ElNvD(_PsCZ$8#;&R#G-MFMdw5ba}{q9v*t(Y`04n&+4SDyMBeK?8j;ecBI}p^ z#sV_znNywK)PLe<{8Lm{lYUfFGaH_SDul7#`EyREDcVQhfUZ-z;e&BRA`P8;+qxfi zK9+~3^NX_cZF>zW@{{n%ug*mDSRC$TBBH~@yMS_*p}KlUWv_{5PJAi;B(#mlOzWqz zI^S?Qv`GG1q&8(+ZXwChH`|Ejb+oXh{N2OFxe7-5K^&?BbRN}~&eLEido&v0cYgEi zk)~-nCohWcK6-P0{)l$V;R^GWiitC|O&8|p-@lJeP8e8Aso(S?o2c@;bT5#Zf0QeBvE3jPUy$(s!bS91gj_SHULBb>|g*SO=