feat: add songsee CLI and tests

This commit is contained in:
Peter Steinberger 2026-01-02 13:54:10 +01:00
parent b7a5d13990
commit ebb7912ac3
27 changed files with 2644 additions and 0 deletions

BIN
cmd/songsee/-.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

261
cmd/songsee/main.go Normal file
View File

@ -0,0 +1,261 @@
// Package main provides the songsee CLI entrypoint.
package main
import (
"errors"
"fmt"
"image"
"image/jpeg"
"image/png"
"io"
"os"
"path/filepath"
"strings"
"github.com/alecthomas/kong"
"github.com/steipete/songsee/internal/audio"
"github.com/steipete/songsee/internal/dsp"
"github.com/steipete/songsee/internal/render"
)
var version = "dev"
type cli struct {
Input string `arg:"" help:"file path or '-' for stdin"`
Output string `short:"o" help:"output image path"`
Format string `help:"output format: jpg or png" default:"jpg"`
Width int `help:"output width in pixels" default:"1920"`
Height int `help:"output height in pixels" default:"1080"`
WindowSize int `name:"window" help:"FFT window size in samples" default:"2048"`
HopSize int `name:"hop" help:"hop size in samples" default:"512"`
MinFreq float64 `name:"min-freq" help:"minimum frequency in Hz"`
MaxFreq float64 `name:"max-freq" help:"maximum frequency in Hz (0 = Nyquist)"`
StartSec float64 `name:"start" help:"start time in seconds"`
Duration float64 `name:"duration" help:"duration in seconds (0 = full)"`
SampleRate int `name:"sample-rate" help:"ffmpeg output sample rate" default:"44100"`
Style string `help:"palette style: classic, magma, inferno, viridis, gray" default:"classic"`
FFmpegPath string `name:"ffmpeg" help:"path to ffmpeg binary"`
Quiet bool `short:"q" help:"suppress stdout output"`
Verbose bool `short:"v" help:"verbose stderr output"`
Version kong.VersionFlag `name:"version" help:"print version"`
}
type exitPanic struct {
code int
}
func main() {
os.Exit(run(os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
}
func run(args []string, stdin io.Reader, stdout, stderr io.Writer) int {
formatSet := hasFlag(args, "--format")
cfg := cli{}
exitCode := -1
parser, err := kong.New(&cfg,
kong.Name("songsee"),
kong.Description("generate a classic spectrogram image"),
kong.Vars{"version": version},
kong.Writers(stdout, stderr),
kong.Exit(func(code int) { panic(exitPanic{code: code}) }),
)
if err != nil {
_, _ = fmt.Fprintln(stderr, "songsee:", err)
return 1
}
var ctx *kong.Context
func() {
defer func() {
if recovered := recover(); recovered != nil {
if exit, ok := recovered.(exitPanic); ok {
exitCode = exit.code
return
}
panic(recovered)
}
}()
ctx, err = parser.Parse(args)
}()
if exitCode >= 0 {
return exitCode
}
if err != nil {
if parseErr, ok := err.(*kong.ParseError); ok {
_, _ = fmt.Fprintln(stderr, "songsee:", parseErr)
if parseErr.Context != nil {
parseErr.Context.Stdout = stderr
_ = parseErr.Context.PrintUsage(false)
}
return 2
}
_, _ = fmt.Fprintln(stderr, "songsee:", err)
return 1
}
input := cfg.Input
if input == "" {
if ctx != nil {
ctx.Stdout = stderr
_ = ctx.PrintUsage(false)
}
return 2
}
if cfg.MaxFreq > 0 && cfg.MaxFreq <= cfg.MinFreq {
return dieUsage(stderr, ctx, "--max-freq must be > --min-freq")
}
if cfg.Width <= 0 || cfg.Height <= 0 {
return dieUsage(stderr, ctx, "--width and --height must be > 0")
}
if cfg.WindowSize <= 0 || cfg.HopSize <= 0 {
return dieUsage(stderr, ctx, "--window and --hop must be > 0")
}
if !isPowerOfTwo(cfg.WindowSize) {
return dieUsage(stderr, ctx, "--window must be a power of two")
}
if cfg.StartSec < 0 || cfg.Duration < 0 {
return dieUsage(stderr, ctx, "--start and --duration must be >= 0")
}
format := strings.ToLower(cfg.Format)
if format != "jpg" && format != "jpeg" && format != "png" {
return dieUsage(stderr, ctx, "--format must be jpg or png")
}
if format == "jpeg" {
format = "jpg"
}
output := cfg.Output
if output == "" {
if input == "-" {
output = "songsee." + format
} else {
ext := strings.ToLower(filepath.Ext(input))
base := strings.TrimSuffix(filepath.Base(input), ext)
output = filepath.Join(filepath.Dir(input), base+"."+format)
}
} else {
ext := strings.ToLower(filepath.Ext(output))
switch ext {
case ".png":
format = "png"
case ".jpg", ".jpeg":
format = "jpg"
default:
if !formatSet {
output = output + "." + format
}
}
}
if cfg.Verbose {
_, _ = fmt.Fprintf(stderr, "input: %s\n", input)
_, _ = fmt.Fprintf(stderr, "output: %s (%s)\n", output, format)
}
opts := audio.Options{SampleRate: cfg.SampleRate, FFmpegPath: cfg.FFmpegPath}
var pcm audio.Audio
if input == "-" {
pcm, err = audio.DecodeReader(stdin, opts)
} else {
pcm, err = audio.DecodeFile(input, opts)
}
if err != nil {
return die(stderr, err)
}
if len(pcm.Samples) == 0 {
return die(stderr, errors.New("no samples decoded"))
}
if cfg.Verbose {
_, _ = fmt.Fprintf(stderr, "decoded: %d samples @ %d Hz\n", len(pcm.Samples), pcm.SampleRate)
}
if cfg.StartSec > 0 || cfg.Duration > 0 {
pcm, err = audio.Slice(pcm, cfg.StartSec, cfg.Duration)
if err != nil {
return die(stderr, err)
}
if cfg.Verbose {
_, _ = fmt.Fprintf(stderr, "slice: %0.2fs + %0.2fs => %d samples\n", cfg.StartSec, cfg.Duration, len(pcm.Samples))
}
}
spec := dsp.ComputeSpectrogram(pcm.Samples, pcm.SampleRate, cfg.WindowSize, cfg.HopSize)
style := strings.ToLower(strings.TrimSpace(cfg.Style))
palette, err := render.PaletteByName(style)
if err != nil {
return dieUsage(stderr, ctx, "unknown style")
}
img, err := render.Spectrogram(spec, render.Options{
Width: cfg.Width,
Height: cfg.Height,
MinFreq: cfg.MinFreq,
MaxFreq: cfg.MaxFreq,
Palette: palette,
})
if err != nil {
return die(stderr, err)
}
if err := writeImage(output, format, img, stdout); err != nil {
return die(stderr, err)
}
if output != "-" && !cfg.Quiet {
_, _ = fmt.Fprintln(stdout, output)
}
return 0
}
func writeImage(path, format string, img image.Image, stdout io.Writer) error {
var out io.Writer
if path == "-" {
out = stdout
} else {
file, err := os.Create(path)
if err != nil {
return err
}
defer func() { _ = file.Close() }()
out = file
}
switch format {
case "png":
return png.Encode(out, img)
case "jpg":
return jpeg.Encode(out, img, &jpeg.Options{Quality: 95})
default:
return fmt.Errorf("unknown format %s", format)
}
}
func die(stderr io.Writer, err error) int {
_, _ = fmt.Fprintln(stderr, "songsee:", err)
return 1
}
func dieUsage(stderr io.Writer, ctx *kong.Context, msg string) int {
_, _ = fmt.Fprintln(stderr, "songsee:", msg)
if ctx != nil {
ctx.Stdout = stderr
_ = ctx.PrintUsage(false)
}
return 2
}
func isPowerOfTwo(v int) bool {
return v > 0 && (v&(v-1)) == 0
}
func hasFlag(args []string, name string) bool {
for i := 0; i < len(args); i++ {
arg := args[i]
if arg == name || strings.HasPrefix(arg, name+"=") {
return true
}
}
return false
}

580
cmd/songsee/main_test.go Normal file
View File

@ -0,0 +1,580 @@
package main
import (
"bytes"
"image"
"image/png"
"math"
"os"
"path/filepath"
"testing"
)
func TestRunMP3E2E(t *testing.T) {
input := testdataPath(t, "sine.mp3")
outDir := t.TempDir()
outPath := filepath.Join(outDir, "spectro.jpg")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{
"--width", "320",
"--height", "180",
"--start", "0.2",
"--duration", "0.5",
"--style", "magma",
"--output", outPath,
input,
}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if stdout.String() == "" {
t.Fatalf("expected stdout output")
}
info, err := os.Stat(outPath)
if err != nil {
t.Fatalf("missing output: %v", err)
}
if info.Size() == 0 {
t.Fatalf("empty output")
}
file, err := os.Open(outPath)
if err != nil {
t.Fatalf("open output: %v", err)
}
defer func() { _ = file.Close() }()
img, _, err := image.Decode(file)
if err != nil {
t.Fatalf("decode image: %v", err)
}
if img.Bounds().Dx() != 320 || img.Bounds().Dy() != 180 {
t.Fatalf("size mismatch")
}
if flatImage(img) {
t.Fatalf("image appears flat")
}
}
func TestRunFromStdinPNG(t *testing.T) {
outDir := t.TempDir()
outPath := filepath.Join(outDir, "spectro.png")
wav := makeWAV([]int16{0, 2000, -2000, 0, 1000, -1000}, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{
"--format", "png",
"--output", outPath,
"-",
}, bytes.NewReader(wav), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
file, err := os.Open(outPath)
if err != nil {
t.Fatalf("open output: %v", err)
}
defer func() { _ = file.Close() }()
if _, err := png.Decode(file); err != nil {
t.Fatalf("decode png: %v", err)
}
}
func TestRunVersion(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--version"}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d", exit)
}
if stdout.String() == "" {
t.Fatalf("expected version output")
}
}
func TestRunHelp(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--help"}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d", exit)
}
if stdout.String() == "" && stderr.String() == "" {
t.Fatalf("expected help output")
}
}
func TestRunInvalidWindow(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--window", "1000", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
if stderr.String() == "" {
t.Fatalf("expected stderr usage")
}
}
func TestRunUnknownFlag(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--nope"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
if stderr.String() == "" {
t.Fatalf("expected stderr output")
}
}
func TestRunBadFormat(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "gif", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunBadFreqRange(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--min-freq", "100", "--max-freq", "50", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunUnknownStyle(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--style", "nope", "-"}, bytes.NewReader(makeWAV([]int16{0, 1}, 44100, 1)), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunBadSize(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--width", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunBadWindowZero(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--window", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunBadHopZero(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--hop", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunNegativeStart(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--start=-1", "-"}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
}
func TestRunMissingFile(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"nope.wav"}, bytes.NewReader(nil), stdout, stderr)
if exit != 1 {
t.Fatalf("expected error exit, got %d", exit)
}
if stderr.String() == "" {
t.Fatalf("expected stderr output")
}
}
func TestRunMissingInput(t *testing.T) {
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{}, bytes.NewReader(nil), stdout, stderr)
if exit != 2 {
t.Fatalf("expected usage exit, got %d", exit)
}
if stderr.String() == "" {
t.Fatalf("expected stderr output")
}
}
func TestRunNoSamplesDecoded(t *testing.T) {
wav := makeWAV([]int16{}, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"-"}, bytes.NewReader(wav), stdout, stderr)
if exit != 1 {
t.Fatalf("expected error exit, got %d", exit)
}
if !bytes.Contains(stderr.Bytes(), []byte("no samples")) {
t.Fatalf("expected no samples error")
}
}
func TestRunSliceError(t *testing.T) {
wav := makeWAV([]int16{0, 1, -1, 0}, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--start", "2", "--duration", "1", "-"}, bytes.NewReader(wav), stdout, stderr)
if exit != 1 {
t.Fatalf("expected error exit, got %d", exit)
}
}
func TestRunSliceVerbose(t *testing.T) {
samples := make([]int16, 44100)
wav := makeWAV(samples, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--verbose", "--start", "0", "--duration", "0.2", "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if !bytes.Contains(stderr.Bytes(), []byte("slice:")) {
t.Fatalf("expected slice output")
}
}
func TestRunStyleAffectsOutput(t *testing.T) {
wav := makeWAV(genSineMixSamples(44100), 44100, 1)
outClassic := runToBytes(t, wav, "classic")
outMagma := runToBytes(t, wav, "magma")
if bytes.Equal(outClassic, outMagma) {
t.Fatalf("expected different output for different styles")
}
}
func TestRunOutputStdout(t *testing.T) {
wav := makeWAV([]int16{0, 1000, -1000, 0, 500, -500}, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{
"--format", "png",
"--output", "-",
"-",
}, bytes.NewReader(wav), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if stdout.Len() == 0 {
t.Fatalf("expected image bytes on stdout")
}
if _, err := png.Decode(bytes.NewReader(stdout.Bytes())); err != nil {
t.Fatalf("decode stdout png: %v", err)
}
}
func TestRunOutputAuto(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "png", input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
outPath := filepath.Join(dir, "input.png")
if _, err := os.Stat(outPath); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunOutputExtOverride(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out.png")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(output); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunOutputAppendDefault(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(output + ".jpg"); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunOutputJpgExt(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out.jpg")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(output); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunWriteImageError(t *testing.T) {
wav := makeWAV([]int16{0, 1000, -1000, 0}, 44100, 1)
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--output", "/nope/dir/out.jpg", "-"}, bytes.NewReader(wav), stdout, stderr)
if exit != 1 {
t.Fatalf("expected error exit, got %d", exit)
}
}
func TestRunFormatFlagKeepsOutput(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "customout")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "png", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(output); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunQuiet(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out.jpg")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--quiet", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if stdout.String() != "" {
t.Fatalf("expected quiet stdout")
}
}
func TestDie(t *testing.T) {
stderr := &bytes.Buffer{}
if code := die(stderr, errSentinel{}); code != 1 {
t.Fatalf("expected code 1")
}
if stderr.String() == "" {
t.Fatalf("expected stderr output")
}
}
type errSentinel struct{}
func (errSentinel) Error() string { return "boom" }
func TestRunInputDashDefaultOutput(t *testing.T) {
tmp := t.TempDir()
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("Getwd: %v", err)
}
if err := os.Chdir(tmp); err != nil {
t.Fatalf("Chdir: %v", err)
}
t.Cleanup(func() { _ = os.Chdir(cwd) })
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "png", "-"}, bytes.NewReader(makeWAV([]int16{0, 1, -1}, 44100, 1)), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(filepath.Join(tmp, "songsee.png")); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestRunFormatJPEG(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "jpeg", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if _, err := os.Stat(output); err != nil {
t.Fatalf("missing output: %v", err)
}
}
func TestWriteImageUnknownFormat(t *testing.T) {
buf := &bytes.Buffer{}
err := writeImage("-", "gif", image.NewRGBA(image.Rect(0, 0, 1, 1)), buf)
if err == nil {
t.Fatalf("expected error")
}
}
func TestRunVerbose(t *testing.T) {
dir := t.TempDir()
input := filepath.Join(dir, "input.wav")
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
t.Fatalf("write input: %v", err)
}
output := filepath.Join(dir, "out.jpg")
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--verbose", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
if !bytes.Contains(stderr.Bytes(), []byte("decoded:")) {
t.Fatalf("expected verbose output")
}
}
func testdataPath(t *testing.T, name string) string {
t.Helper()
wd, err := os.Getwd()
if err != nil {
t.Fatalf("Getwd: %v", err)
}
root := filepath.Dir(filepath.Dir(wd))
path := filepath.Join(root, "testdata", name)
if _, err := os.Stat(path); err != nil {
t.Fatalf("missing testdata: %v", err)
}
return path
}
func flatImage(img image.Image) bool {
bounds := img.Bounds()
minLum := uint32(0xFFFFFFFF)
maxLum := uint32(0)
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
r, g, b, _ := img.At(x, y).RGBA()
lum := (r + g + b) / 3
if lum < minLum {
minLum = lum
}
if lum > maxLum {
maxLum = lum
}
}
}
return maxLum-minLum < 1000
}
func makeWAV(samples []int16, sampleRate int, channels int) []byte {
if channels < 1 {
channels = 1
}
dataLen := len(samples) * 2
riffSize := 4 + (8 + 16) + (8 + dataLen)
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, uint32(riffSize))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 1)
writeU16(buf, uint16(channels))
writeU32(buf, uint32(sampleRate))
byteRate := sampleRate * channels * 2
writeU32(buf, uint32(byteRate))
blockAlign := channels * 2
writeU16(buf, uint16(blockAlign))
writeU16(buf, 16)
buf.WriteString("data")
writeU32(buf, uint32(dataLen))
for _, s := range samples {
writeU16(buf, uint16(s))
}
return buf.Bytes()
}
func writeU16(buf *bytes.Buffer, v uint16) {
buf.WriteByte(byte(v))
buf.WriteByte(byte(v >> 8))
}
func writeU32(buf *bytes.Buffer, v uint32) {
buf.WriteByte(byte(v))
buf.WriteByte(byte(v >> 8))
buf.WriteByte(byte(v >> 16))
buf.WriteByte(byte(v >> 24))
}
func runToBytes(t *testing.T, wav []byte, style string) []byte {
t.Helper()
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
exit := run([]string{"--format", "png", "--style", style, "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr)
if exit != 0 {
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
}
return stdout.Bytes()
}
func genSineMixSamples(n int) []int16 {
out := make([]int16, n)
for i := 0; i < n; i++ {
t := float64(i) / float64(n)
v := 0.5*math.Sin(2*math.Pi*440*t) + 0.4*math.Sin(2*math.Pi*880*t)
out[i] = int16(v * 15000)
}
return out
}

7
go.mod Normal file
View File

@ -0,0 +1,7 @@
module github.com/steipete/songsee
go 1.25
require github.com/hajimehoshi/go-mp3 v0.3.4
require github.com/alecthomas/kong v1.13.0

12
go.sum Normal file
View File

@ -0,0 +1,12 @@
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/kong v1.13.0 h1:5e/7XC3ugvhP1DQBmTS+WuHtCbcv44hsohMgcvVxSrA=
github.com/alecthomas/kong v1.13.0/go.mod h1:wrlbXem1CWqUV5Vbmss5ISYhsVPkBb1Yo7YKJghju2I=
github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

21
internal/audio/audio.go Normal file
View File

@ -0,0 +1,21 @@
// Package audio handles decoding audio into mono float samples.
package audio
import "fmt"
// Audio holds mono samples in [-1,1] range.
type Audio struct {
SampleRate int
Samples []float64
}
// Options controls decoding behavior.
type Options struct {
SampleRate int
FFmpegPath string
}
var (
// ErrUnsupported is returned when no decoder can handle the input.
ErrUnsupported = fmt.Errorf("unsupported audio format")
)

View File

@ -0,0 +1,249 @@
package audio
import (
"bytes"
"math"
"os"
"path/filepath"
"testing"
)
func TestDecodeWAVBytes(t *testing.T) {
samples := make([]int16, 1000)
for i := range samples {
samples[i] = int16(2000 * math.Sin(2*math.Pi*float64(i)/50))
}
data := makeWAV(samples, 44100, 1)
pcm, err := DecodeBytes(data, Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if pcm.SampleRate != 44100 {
t.Fatalf("sample rate = %d", pcm.SampleRate)
}
if len(pcm.Samples) != len(samples) {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeWAVIfNotWAV(t *testing.T) {
_, ok, err := DecodeWAVIf(bytesReader([]byte("NOTWAVE12345")))
if err != nil {
t.Fatalf("DecodeWAVIf error: %v", err)
}
if ok {
t.Fatalf("expected ok=false")
}
}
func TestDecodeWAVIfValid(t *testing.T) {
data := makeWAV([]int16{0, 1000, -1000}, 44100, 1)
pcm, ok, err := DecodeWAVIf(bytesReader(data))
if err != nil {
t.Fatalf("DecodeWAVIf error: %v", err)
}
if !ok {
t.Fatalf("expected ok=true")
}
if len(pcm.Samples) != 3 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeMP3File(t *testing.T) {
path := testdataPath(t, "sine.mp3")
pcm, err := DecodeFile(path, Options{})
if err != nil {
t.Fatalf("DecodeFile: %v", err)
}
if pcm.SampleRate == 0 || len(pcm.Samples) == 0 {
t.Fatalf("invalid decode result")
}
}
func TestDecodeMP3IfValid(t *testing.T) {
path := testdataPath(t, "sine.mp3")
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
pcm, ok, err := DecodeMP3If(bytesReader(data))
if err != nil {
t.Fatalf("DecodeMP3If: %v", err)
}
if !ok {
t.Fatalf("expected ok=true")
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeFileUnknownExt(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audio.bin")
if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil {
t.Fatalf("write file: %v", err)
}
pcm, err := DecodeFile(path, Options{})
if err != nil {
t.Fatalf("DecodeFile: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeFileWAV(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audio.wav")
if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil {
t.Fatalf("write file: %v", err)
}
pcm, err := DecodeFile(path, Options{})
if err != nil {
t.Fatalf("DecodeFile: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeFileFFmpegFallbackError(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audio.bin")
if err := os.WriteFile(path, []byte("garbagegarbagegarbage"), 0o644); err != nil {
t.Fatalf("write file: %v", err)
}
if _, err := DecodeFile(path, Options{}); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeBytesFFmpegFallbackError(t *testing.T) {
_, err := DecodeBytes([]byte("not audio"), Options{})
if err == nil {
t.Fatalf("expected error for garbage data")
}
}
func TestDecodeMP3Bytes(t *testing.T) {
path := testdataPath(t, "sine.mp3")
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
pcm, err := DecodeBytes(data, Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeReader(t *testing.T) {
samples := []int16{0, 1000, -1000, 0}
data := makeWAV(samples, 48000, 1)
pcm, err := DecodeReader(bytesReader(data), Options{})
if err != nil {
t.Fatalf("DecodeReader: %v", err)
}
if pcm.SampleRate != 48000 {
t.Fatalf("sample rate = %d", pcm.SampleRate)
}
}
func TestDecodeReaderError(t *testing.T) {
_, err := DecodeReader(errReader{}, Options{})
if err == nil {
t.Fatalf("expected error")
}
}
func TestSlice(t *testing.T) {
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}}
out, err := Slice(a, 0.2, 0.5)
if err != nil {
t.Fatalf("Slice: %v", err)
}
if len(out.Samples) != 5 {
t.Fatalf("slice samples = %d", len(out.Samples))
}
}
func TestSliceErrors(t *testing.T) {
_, err := Slice(Audio{SampleRate: 10, Samples: []float64{1}}, -1, 1)
if err == nil {
t.Fatalf("expected error for negative start")
}
_, err = Slice(Audio{SampleRate: 0, Samples: []float64{1}}, 0, 1)
if err == nil {
t.Fatalf("expected error for sample rate")
}
_, err = Slice(Audio{SampleRate: 10, Samples: []float64{}}, 0, 1)
if err == nil {
t.Fatalf("expected error for empty samples")
}
_, err = Slice(Audio{SampleRate: 10, Samples: []float64{1}}, 2, 0)
if err == nil {
t.Fatalf("expected error for start")
}
}
func TestSliceFullDuration(t *testing.T) {
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3}}
out, err := Slice(a, 0, 0)
if err != nil {
t.Fatalf("Slice: %v", err)
}
if len(out.Samples) != len(a.Samples) {
t.Fatalf("expected full slice")
}
}
func TestSliceDurationTooShort(t *testing.T) {
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2}}
if _, err := Slice(a, 0, 0.01); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeMP3IfNotMP3(t *testing.T) {
_, ok, err := DecodeMP3If(bytesReader([]byte("NOTMP3DATA")))
if err != nil {
t.Fatalf("DecodeMP3If error: %v", err)
}
if ok {
t.Fatalf("expected ok=false")
}
}
func TestDecodeWAVUnsupported(t *testing.T) {
_, err := decodeWAV(bytesReader([]byte("NOTWAVE12345")))
if err == nil {
t.Fatalf("expected error")
}
}
func testdataPath(t *testing.T, name string) string {
t.Helper()
wd, err := os.Getwd()
if err != nil {
t.Fatalf("Getwd: %v", err)
}
root := filepath.Dir(filepath.Dir(wd))
path := filepath.Join(root, "testdata", name)
if _, err := os.Stat(path); err != nil {
t.Fatalf("missing testdata: %v", err)
}
return path
}
func bytesReader(b []byte) *bytes.Reader {
return bytes.NewReader(b)
}
type errReader struct{}
func (errReader) Read([]byte) (int, error) { return 0, os.ErrInvalid }

78
internal/audio/decode.go Normal file
View File

@ -0,0 +1,78 @@
// Package audio handles decoding audio into mono float samples.
package audio
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
// DecodeFile reads an audio file, decoding WAV/MP3 and falling back to ffmpeg.
func DecodeFile(path string, opts Options) (Audio, error) {
file, err := os.Open(path)
if err != nil {
return Audio{}, err
}
defer func() { _ = file.Close() }()
ext := strings.ToLower(filepath.Ext(path))
if ext == ".wav" || ext == ".wave" {
if pcm, ok, err := DecodeWAVIf(file); ok {
return pcm, err
}
}
if ext == ".mp3" {
if pcm, ok, err := DecodeMP3If(file); ok {
return pcm, err
}
}
if pcm, ok, err := DecodeWAVIf(file); ok {
return pcm, err
}
if pcm, ok, err := DecodeMP3If(file); ok {
return pcm, err
}
if opts.SampleRate == 0 {
opts.SampleRate = 44100
}
pcm, err := DecodeWithFFmpeg(path, nil, opts.SampleRate, opts.FFmpegPath)
if err != nil {
return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err)
}
return pcm, nil
}
// DecodeBytes decodes audio data from a byte slice.
func DecodeBytes(data []byte, opts Options) (Audio, error) {
reader := bytes.NewReader(data)
if pcm, ok, err := DecodeWAVIf(reader); ok {
return pcm, err
}
reader.Reset(data)
if pcm, ok, err := DecodeMP3If(reader); ok {
return pcm, err
}
if opts.SampleRate == 0 {
opts.SampleRate = 44100
}
pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), opts.SampleRate, opts.FFmpegPath)
if err != nil {
return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err)
}
return pcm, nil
}
// DecodeReader decodes audio data from an io.Reader.
func DecodeReader(r io.Reader, opts Options) (Audio, error) {
data, err := io.ReadAll(r)
if err != nil {
return Audio{}, err
}
return DecodeBytes(data, opts)
}

67
internal/audio/ffmpeg.go Normal file
View File

@ -0,0 +1,67 @@
// Package audio handles decoding audio into mono float samples.
package audio
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"os/exec"
)
// DecodeWithFFmpeg uses ffmpeg to decode any input into mono float samples.
func DecodeWithFFmpeg(path string, stdin io.Reader, sampleRate int, ffmpegPath string) (Audio, error) {
if sampleRate <= 0 {
sampleRate = 44100
}
ffmpeg, err := resolveFFmpeg(ffmpegPath)
if err != nil {
return Audio{}, err
}
args := []string{"-hide_banner", "-loglevel", "error"}
if stdin != nil {
args = append(args, "-i", "pipe:0")
} else {
args = append(args, "-i", path)
}
args = append(args, "-f", "f32le", "-ac", "1", "-ar", fmt.Sprintf("%d", sampleRate), "-")
cmd := exec.Command(ffmpeg, args...)
if stdin != nil {
cmd.Stdin = stdin
}
var stderr bytes.Buffer
cmd.Stderr = &stderr
out, err := cmd.Output()
if err != nil {
if stderr.Len() > 0 {
return Audio{}, fmt.Errorf("ffmpeg: %v: %s", err, stderr.String())
}
return Audio{}, err
}
if len(out)%4 != 0 {
return Audio{}, fmt.Errorf("ffmpeg: unexpected pcm length")
}
samples := make([]float64, len(out)/4)
for i := 0; i < len(samples); i++ {
bits := binary.LittleEndian.Uint32(out[i*4 : i*4+4])
samples[i] = float64(math.Float32frombits(bits))
}
return Audio{SampleRate: sampleRate, Samples: samples}, nil
}
func resolveFFmpeg(path string) (string, error) {
if path != "" {
return path, nil
}
ffmpeg, err := exec.LookPath("ffmpeg")
if err != nil {
return "", fmt.Errorf("ffmpeg not found in PATH")
}
return ffmpeg, nil
}

View File

@ -0,0 +1,74 @@
package audio
import (
"bytes"
"os"
"testing"
)
func TestResolveFFmpeg(t *testing.T) {
path, err := resolveFFmpeg("")
if err != nil {
t.Fatalf("resolveFFmpeg: %v", err)
}
if path == "" {
t.Fatalf("empty ffmpeg path")
}
}
func TestResolveFFmpegExplicit(t *testing.T) {
found, err := resolveFFmpeg("")
if err != nil {
t.Fatalf("resolveFFmpeg: %v", err)
}
path, err := resolveFFmpeg(found)
if err != nil {
t.Fatalf("resolveFFmpeg explicit: %v", err)
}
if path == "" {
t.Fatalf("empty ffmpeg path")
}
}
func TestResolveFFmpegMissing(t *testing.T) {
t.Setenv("PATH", "")
if _, err := resolveFFmpeg(""); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeWithFFmpegFile(t *testing.T) {
path := testdataPath(t, "sine.mp3")
pcm, err := DecodeWithFFmpeg(path, nil, 22050, "")
if err != nil {
t.Fatalf("DecodeWithFFmpeg: %v", err)
}
if pcm.SampleRate != 22050 {
t.Fatalf("sample rate = %d", pcm.SampleRate)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeWithFFmpegStdin(t *testing.T) {
path := testdataPath(t, "sine.mp3")
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), 44100, "")
if err != nil {
t.Fatalf("DecodeWithFFmpeg stdin: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeWithFFmpegBadPath(t *testing.T) {
_, err := DecodeWithFFmpeg("missing.mp3", nil, 0, "/no/such/ffmpeg")
if err == nil {
t.Fatalf("expected error")
}
}

72
internal/audio/mp3.go Normal file
View File

@ -0,0 +1,72 @@
package audio
import (
"bytes"
"errors"
"io"
"github.com/hajimehoshi/go-mp3"
)
// DecodeMP3If tries to decode MP3 data, returning ok=false when not MP3.
func DecodeMP3If(r io.ReadSeeker) (Audio, bool, error) {
header := make([]byte, 4)
if _, err := io.ReadFull(r, header); err != nil {
return Audio{}, false, err
}
isMP3 := string(header[0:3]) == "ID3" || (header[0] == 0xFF && header[1]&0xE0 == 0xE0)
_, _ = r.Seek(0, io.SeekStart)
if !isMP3 {
return Audio{}, false, nil
}
pcm, err := decodeMP3(r)
if err != nil {
return Audio{}, true, err
}
return pcm, true, nil
}
func decodeMP3(r io.Reader) (Audio, error) {
dec, err := mp3.NewDecoder(r)
if err != nil {
return Audio{}, err
}
pcm, err := io.ReadAll(dec)
if err != nil {
return Audio{}, err
}
if len(pcm)%2 != 0 {
return Audio{}, errors.New("mp3: odd pcm length")
}
channels := 1
if len(pcm)%4 == 0 {
channels = 2
}
frames := len(pcm) / (2 * channels)
out := make([]float64, frames)
buf := bytes.NewReader(pcm)
for i := 0; i < frames; i++ {
var sum float64
for ch := 0; ch < channels; ch++ {
var sample int16
if err := binaryRead(buf, &sample); err != nil {
return Audio{}, err
}
sum += float64(sample) / 32768.0
}
out[i] = sum / float64(channels)
}
return Audio{SampleRate: dec.SampleRate(), Samples: out}, nil
}
func binaryRead(r io.Reader, v *int16) error {
var b [2]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return err
}
*v = int16(b[0]) | int16(b[1])<<8
return nil
}

View File

@ -0,0 +1,30 @@
package audio
import (
"bytes"
"testing"
)
func TestDecodeMP3Error(t *testing.T) {
if _, err := decodeMP3(bytes.NewReader([]byte("not mp3"))); err == nil {
t.Fatalf("expected error")
}
}
func TestBinaryReadError(t *testing.T) {
var s int16
if err := binaryRead(bytes.NewReader([]byte{0x01}), &s); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeMP3IfCorrupt(t *testing.T) {
data := []byte{'I', 'D', '3', 0x03, 0x00}
_, ok, err := DecodeMP3If(bytes.NewReader(data))
if !ok {
t.Fatalf("expected ok=true")
}
if err == nil {
t.Fatalf("expected error")
}
}

33
internal/audio/slice.go Normal file
View File

@ -0,0 +1,33 @@
package audio
import "fmt"
// Slice returns a time-based slice of audio in seconds.
func Slice(a Audio, startSec, durationSec float64) (Audio, error) {
if startSec < 0 || durationSec < 0 {
return Audio{}, fmt.Errorf("slice: start and duration must be >= 0")
}
if a.SampleRate <= 0 {
return Audio{}, fmt.Errorf("slice: invalid sample rate")
}
if len(a.Samples) == 0 {
return Audio{}, fmt.Errorf("slice: empty samples")
}
start := int(startSec * float64(a.SampleRate))
if start >= len(a.Samples) {
return Audio{}, fmt.Errorf("slice: start beyond end")
}
end := len(a.Samples)
if durationSec > 0 {
end = start + int(durationSec*float64(a.SampleRate))
if end > len(a.Samples) {
end = len(a.Samples)
}
if end <= start {
return Audio{}, fmt.Errorf("slice: duration too short")
}
}
return Audio{SampleRate: a.SampleRate, Samples: a.Samples[start:end]}, nil
}

229
internal/audio/wav.go Normal file
View File

@ -0,0 +1,229 @@
package audio
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
)
// DecodeWAVIf tries to decode WAV data, returning ok=false when not WAV.
func DecodeWAVIf(r io.ReadSeeker) (Audio, bool, error) {
header := make([]byte, 12)
if _, err := io.ReadFull(r, header); err != nil {
return Audio{}, false, err
}
if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" {
_, _ = r.Seek(0, io.SeekStart)
return Audio{}, false, nil
}
_, _ = r.Seek(0, io.SeekStart)
pcm, err := decodeWAV(r)
if err != nil {
return Audio{}, true, err
}
return pcm, true, nil
}
func decodeWAV(r io.ReadSeeker) (Audio, error) {
var (
fmtFound bool
dataFound bool
fmtChunk wavFormat
data []byte
)
header := make([]byte, 12)
if _, err := io.ReadFull(r, header); err != nil {
return Audio{}, err
}
if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" {
return Audio{}, ErrUnsupported
}
for {
chunkHeader := make([]byte, 8)
_, err := io.ReadFull(r, chunkHeader)
if err == io.EOF {
break
}
if err != nil {
return Audio{}, err
}
chunkID := string(chunkHeader[0:4])
chunkSize := int(binary.LittleEndian.Uint32(chunkHeader[4:8]))
switch chunkID {
case "fmt ":
fmtFound = true
buf := make([]byte, chunkSize)
if _, err := io.ReadFull(r, buf); err != nil {
return Audio{}, err
}
if err := parseWavFormat(buf, &fmtChunk); err != nil {
return Audio{}, err
}
case "data":
dataFound = true
data = make([]byte, chunkSize)
if _, err := io.ReadFull(r, data); err != nil {
return Audio{}, err
}
default:
// Skip unknown chunk.
if _, err := r.Seek(int64(chunkSize), io.SeekCurrent); err != nil {
return Audio{}, err
}
}
if chunkSize%2 == 1 {
_, _ = r.Seek(1, io.SeekCurrent)
}
}
if !fmtFound || !dataFound {
return Audio{}, errors.New("wav: missing fmt or data chunk")
}
return decodeWavData(fmtChunk, data)
}
type wavFormat struct {
AudioFormat uint16
NumChannels uint16
SampleRate uint32
BitsPerSample uint16
Extensible bool
SubFormat [16]byte
}
func parseWavFormat(buf []byte, fmtChunk *wavFormat) error {
if len(buf) < 16 {
return errors.New("wav: short fmt chunk")
}
fmtChunk.AudioFormat = binary.LittleEndian.Uint16(buf[0:2])
fmtChunk.NumChannels = binary.LittleEndian.Uint16(buf[2:4])
fmtChunk.SampleRate = binary.LittleEndian.Uint32(buf[4:8])
fmtChunk.BitsPerSample = binary.LittleEndian.Uint16(buf[14:16])
if fmtChunk.AudioFormat == 0xFFFE && len(buf) >= 40 {
fmtChunk.Extensible = true
copy(fmtChunk.SubFormat[:], buf[24:40])
}
return nil
}
func decodeWavData(fmtChunk wavFormat, data []byte) (Audio, error) {
format := fmtChunk.AudioFormat
if fmtChunk.Extensible {
// PCM subformat GUID 00000001-0000-0010-8000-00aa00389b71
if isGUID(fmtChunk.SubFormat, 0x00000001) {
format = 1
} else if isGUID(fmtChunk.SubFormat, 0x00000003) {
format = 3
}
}
switch format {
case 1, 3:
// PCM or IEEE float.
default:
return Audio{}, fmt.Errorf("wav: unsupported format %d", format)
}
channels := int(fmtChunk.NumChannels)
if channels < 1 {
return Audio{}, errors.New("wav: invalid channel count")
}
sampleRate := int(fmtChunk.SampleRate)
bits := int(fmtChunk.BitsPerSample)
if bits == 0 {
return Audio{}, errors.New("wav: invalid bits per sample")
}
var samples []float64
if format == 3 {
samples = decodeWavFloat(data, bits, channels)
} else {
samples = decodeWavPCM(data, bits, channels)
}
if samples == nil {
return Audio{}, fmt.Errorf("wav: unsupported bit depth %d", bits)
}
return Audio{SampleRate: sampleRate, Samples: samples}, nil
}
func decodeWavPCM(data []byte, bits, channels int) []float64 {
bytesPerSample := bits / 8
frameSize := bytesPerSample * channels
if frameSize == 0 {
return nil
}
frames := len(data) / frameSize
out := make([]float64, frames)
idx := 0
for i := 0; i < frames; i++ {
var sum float64
for ch := 0; ch < channels; ch++ {
off := idx + ch*bytesPerSample
var v int32
switch bits {
case 8:
v = int32(int(data[off]) - 128)
case 16:
v = int32(int16(binary.LittleEndian.Uint16(data[off : off+2])))
case 24:
b := data[off : off+3]
v = int32(b[0]) | int32(b[1])<<8 | int32(b[2])<<16
if v&0x800000 != 0 {
v |= ^0xffffff
}
case 32:
v = int32(binary.LittleEndian.Uint32(data[off : off+4]))
default:
return nil
}
scale := float64(int64(1) << (bits - 1))
sum += float64(v) / scale
}
out[i] = sum / float64(channels)
idx += frameSize
}
return out
}
func decodeWavFloat(data []byte, bits, channels int) []float64 {
bytesPerSample := bits / 8
frameSize := bytesPerSample * channels
if frameSize == 0 {
return nil
}
frames := len(data) / frameSize
out := make([]float64, frames)
idx := 0
for i := 0; i < frames; i++ {
var sum float64
for ch := 0; ch < channels; ch++ {
off := idx + ch*bytesPerSample
switch bits {
case 32:
sum += float64(math.Float32frombits(binary.LittleEndian.Uint32(data[off : off+4])))
case 64:
sum += math.Float64frombits(binary.LittleEndian.Uint64(data[off : off+8]))
default:
return nil
}
}
out[i] = sum / float64(channels)
idx += frameSize
}
return out
}
func isGUID(b [16]byte, sub uint32) bool {
return binary.LittleEndian.Uint32(b[0:4]) == sub &&
binary.LittleEndian.Uint16(b[4:6]) == 0x0000 &&
binary.LittleEndian.Uint16(b[6:8]) == 0x0010 &&
b[8] == 0x80 && b[9] == 0x00 &&
b[10] == 0x00 && b[11] == 0xAA && b[12] == 0x00 && b[13] == 0x38 && b[14] == 0x9B && b[15] == 0x71
}

View File

@ -0,0 +1,47 @@
package audio
import (
"bytes"
"testing"
)
func TestDecodeWAVExtensiblePCM(t *testing.T) {
payload := []byte{0, 0, 0, 0}
buf := &bytes.Buffer{}
riffSize := 4 + (8 + 40) + (8 + len(payload))
buf.WriteString("RIFF")
writeU32(buf, uint32(riffSize))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 40)
writeU16(buf, 0xFFFE)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*2)
writeU16(buf, 2)
writeU16(buf, 16)
writeU16(buf, 22)
writeU16(buf, 16)
writeU32(buf, 1)
buf.Write([]byte{
0x01, 0x00, 0x00, 0x00,
0x00, 0x00,
0x10, 0x00,
0x80, 0x00,
0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71,
})
buf.WriteString("data")
writeU32(buf, uint32(len(payload)))
buf.Write(payload)
pcm, err := DecodeBytes(buf.Bytes(), Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}

View File

@ -0,0 +1,118 @@
package audio
import (
"bytes"
"testing"
)
func TestDecodeWAVWithExtraChunk(t *testing.T) {
payload := []byte{0, 0, 0, 0}
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, uint32(4+(8+16)+(8+5)+(8+len(payload))))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 1)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*2)
writeU16(buf, 2)
writeU16(buf, 16)
buf.WriteString("JUNK")
writeU32(buf, 5)
buf.Write([]byte{1, 2, 3, 4, 5})
buf.WriteByte(0)
buf.WriteString("data")
writeU32(buf, uint32(len(payload)))
buf.Write(payload)
pcm, err := DecodeBytes(buf.Bytes(), Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) == 0 {
t.Fatalf("empty samples")
}
}
func TestDecodeWAVMissingData(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4+(8+16))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 1)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*2)
writeU16(buf, 2)
writeU16(buf, 16)
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
t.Fatalf("expected error for missing data")
}
}
func TestDecodeWAVHeaderOnly(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4)
buf.WriteString("WAVE")
if _, err := decodeWAV(bytes.NewReader(buf.Bytes())); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeWAVInvalidChannels(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4+(8+16)+(8+2))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 1)
writeU16(buf, 0)
writeU32(buf, 44100)
writeU32(buf, 44100*2)
writeU16(buf, 2)
writeU16(buf, 16)
buf.WriteString("data")
writeU32(buf, 2)
buf.Write([]byte{0, 0})
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
t.Fatalf("expected error for channels")
}
}
func TestDecodeWAVFloatUnsupportedBits(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4+(8+16)+(8+3))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 3)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*3)
writeU16(buf, 3)
writeU16(buf, 24)
buf.WriteString("data")
writeU32(buf, 3)
buf.Write([]byte{0, 0, 0})
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
t.Fatalf("expected error for float bit depth")
}
}

View File

@ -0,0 +1,77 @@
package audio
import (
"bytes"
"encoding/binary"
"testing"
)
func TestDecodeWAVFloat32(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4+(8+16)+(8+8))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 3)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*4)
writeU16(buf, 4)
writeU16(buf, 32)
buf.WriteString("data")
writeU32(buf, 8)
_ = binary.Write(buf, binary.LittleEndian, float32(0.5))
_ = binary.Write(buf, binary.LittleEndian, float32(-0.25))
pcm, err := DecodeBytes(buf.Bytes(), Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) != 2 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeWAVFloat64(t *testing.T) {
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
writeU32(buf, 4+(8+16)+(8+16))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, 3)
writeU16(buf, 1)
writeU32(buf, 44100)
writeU32(buf, 44100*8)
writeU16(buf, 8)
writeU16(buf, 64)
buf.WriteString("data")
writeU32(buf, 16)
_ = binary.Write(buf, binary.LittleEndian, float64(0.5))
_ = binary.Write(buf, binary.LittleEndian, float64(-0.25))
pcm, err := DecodeBytes(buf.Bytes(), Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) != 2 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func writeU16(buf *bytes.Buffer, v uint16) {
buf.WriteByte(byte(v))
buf.WriteByte(byte(v >> 8))
}
func writeU32(buf *bytes.Buffer, v uint32) {
buf.WriteByte(byte(v))
buf.WriteByte(byte(v >> 8))
buf.WriteByte(byte(v >> 16))
buf.WriteByte(byte(v >> 24))
}

View File

@ -0,0 +1,101 @@
package audio
import (
"bytes"
"encoding/binary"
"testing"
)
func TestDecodeWAVPCM8(t *testing.T) {
data := makeWAVPCM(8, []int32{0, 64, -64, 0}, 44100)
pcm, err := DecodeBytes(data, Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) != 4 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeWAVPCM24(t *testing.T) {
data := makeWAVPCM(24, []int32{0, 100000, -100000, 0}, 44100)
pcm, err := DecodeBytes(data, Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) != 4 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeWAVPCM32(t *testing.T) {
data := makeWAVPCM(32, []int32{0, 100000, -100000, 0}, 44100)
pcm, err := DecodeBytes(data, Options{})
if err != nil {
t.Fatalf("DecodeBytes: %v", err)
}
if len(pcm.Samples) != 4 {
t.Fatalf("samples = %d", len(pcm.Samples))
}
}
func TestDecodeWAVUnsupportedFormat(t *testing.T) {
data := makeWAVCustom(7, 16, []byte{0, 0}, 44100)
if _, err := DecodeBytes(data, Options{}); err == nil {
t.Fatalf("expected error")
}
}
func TestDecodeWAVUnsupportedBits(t *testing.T) {
data := makeWAVCustom(1, 12, []byte{0, 0}, 44100)
if _, err := DecodeBytes(data, Options{}); err == nil {
t.Fatalf("expected error")
}
}
func makeWAVPCM(bits int, samples []int32, sampleRate int) []byte {
data := &bytes.Buffer{}
for _, s := range samples {
switch bits {
case 8:
b := byte(int(s) + 128)
data.WriteByte(b)
case 16:
_ = binary.Write(data, binary.LittleEndian, int16(s))
case 24:
v := uint32(int32(s))
data.WriteByte(byte(v))
data.WriteByte(byte(v >> 8))
data.WriteByte(byte(v >> 16))
case 32:
_ = binary.Write(data, binary.LittleEndian, int32(s))
}
}
return makeWAVCustom(1, bits, data.Bytes(), sampleRate)
}
func makeWAVCustom(format uint16, bits int, payload []byte, sampleRate int) []byte {
buf := &bytes.Buffer{}
riffSize := 4 + (8 + 16) + (8 + len(payload))
buf.WriteString("RIFF")
writeU32(buf, uint32(riffSize))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
writeU32(buf, 16)
writeU16(buf, format)
writeU16(buf, 1)
writeU32(buf, uint32(sampleRate))
byteRate := sampleRate * (bits / 8)
writeU32(buf, uint32(byteRate))
blockAlign := bits / 8
writeU16(buf, uint16(blockAlign))
writeU16(buf, uint16(bits))
buf.WriteString("data")
writeU32(buf, uint32(len(payload)))
buf.Write(payload)
return buf.Bytes()
}

View File

@ -0,0 +1,38 @@
package audio
import (
"bytes"
"encoding/binary"
)
func makeWAV(samples []int16, sampleRate int, channels int) []byte {
if channels < 1 {
channels = 1
}
dataLen := len(samples) * 2
riffSize := 4 + (8 + 16) + (8 + dataLen)
buf := &bytes.Buffer{}
buf.WriteString("RIFF")
_ = binary.Write(buf, binary.LittleEndian, uint32(riffSize))
buf.WriteString("WAVE")
buf.WriteString("fmt ")
_ = binary.Write(buf, binary.LittleEndian, uint32(16))
_ = binary.Write(buf, binary.LittleEndian, uint16(1))
_ = binary.Write(buf, binary.LittleEndian, uint16(channels))
_ = binary.Write(buf, binary.LittleEndian, uint32(sampleRate))
byteRate := sampleRate * channels * 2
_ = binary.Write(buf, binary.LittleEndian, uint32(byteRate))
blockAlign := channels * 2
_ = binary.Write(buf, binary.LittleEndian, uint16(blockAlign))
_ = binary.Write(buf, binary.LittleEndian, uint16(16))
buf.WriteString("data")
_ = binary.Write(buf, binary.LittleEndian, uint32(dataLen))
for _, s := range samples {
_ = binary.Write(buf, binary.LittleEndian, s)
}
return buf.Bytes()
}

40
internal/dsp/fft.go Normal file
View File

@ -0,0 +1,40 @@
// Package dsp provides spectral analysis utilities.
package dsp
import "math"
// FFTInPlace computes the in-place FFT for length power-of-two slices.
func FFTInPlace(x []complex128) {
n := len(x)
if n <= 1 {
return
}
// Bit-reversal permutation.
j := 0
for i := 1; i < n; i++ {
bit := n >> 1
for ; j&bit != 0; bit >>= 1 {
j &= ^bit
}
j |= bit
if i < j {
x[i], x[j] = x[j], x[i]
}
}
for size := 2; size <= n; size <<= 1 {
angle := -2 * math.Pi / float64(size)
wlen := complex(math.Cos(angle), math.Sin(angle))
for i := 0; i < n; i += size {
w := complex(1, 0)
for j := 0; j < size/2; j++ {
u := x[i+j]
v := w * x[i+j+size/2]
x[i+j] = u + v
x[i+j+size/2] = u - v
w *= wlen
}
}
}
}

13
internal/dsp/fft_test.go Normal file
View File

@ -0,0 +1,13 @@
package dsp
import "testing"
func TestFFTImpulse(t *testing.T) {
x := []complex128{1, 0, 0, 0}
FFTInPlace(x)
for i, v := range x {
if real(v) < 0.99 || real(v) > 1.01 || imag(v) != 0 {
t.Fatalf("bin %d = %v", i, v)
}
}
}

View File

@ -0,0 +1,98 @@
// Package dsp provides spectral analysis utilities.
package dsp
import (
"math"
)
// Spectrogram contains log-magnitude FFT frames.
type Spectrogram struct {
Frames int
Bins int
Values []float64
Min float64
Max float64
SampleRate int
WindowSize int
BinHz float64
}
// HannWindow returns a Hann window of length n.
func HannWindow(n int) []float64 {
w := make([]float64, n)
if n == 1 {
w[0] = 1
return w
}
for i := 0; i < n; i++ {
w[i] = 0.5 - 0.5*math.Cos(2*math.Pi*float64(i)/float64(n-1))
}
return w
}
// ComputeSpectrogram computes a log-magnitude spectrogram.
func ComputeSpectrogram(samples []float64, sampleRate, windowSize, hopSize int) Spectrogram {
if windowSize <= 0 {
windowSize = 2048
}
if hopSize <= 0 {
hopSize = windowSize / 4
}
if hopSize <= 0 {
hopSize = 1
}
if sampleRate <= 0 {
sampleRate = 44100
}
frames := 1
if len(samples) > windowSize {
frames = 1 + (len(samples)-windowSize+hopSize-1)/hopSize
}
bins := windowSize/2 + 1
values := make([]float64, frames*bins)
window := HannWindow(windowSize)
minVal := math.Inf(1)
maxVal := math.Inf(-1)
eps := 1e-9
frame := make([]complex128, windowSize)
for f := 0; f < frames; f++ {
start := f * hopSize
for i := 0; i < windowSize; i++ {
idx := start + i
if idx < len(samples) {
frame[i] = complex(samples[idx]*window[i], 0)
} else {
frame[i] = 0
}
}
FFTInPlace(frame)
for b := 0; b < bins; b++ {
re := real(frame[b])
im := imag(frame[b])
mag := math.Sqrt(re*re + im*im)
db := 20 * math.Log10(mag+eps)
values[f*bins+b] = db
if db < minVal {
minVal = db
}
if db > maxVal {
maxVal = db
}
}
}
binHz := float64(sampleRate) / float64(windowSize)
return Spectrogram{
Frames: frames,
Bins: bins,
Values: values,
Min: minVal,
Max: maxVal,
SampleRate: sampleRate,
WindowSize: windowSize,
BinHz: binHz,
}
}

View File

@ -0,0 +1,51 @@
package dsp
import "testing"
func TestComputeSpectrogram(t *testing.T) {
samples := make([]float64, 4096)
for i := range samples {
samples[i] = 0.5
}
spec := ComputeSpectrogram(samples, 44100, 1024, 256)
if spec.Frames <= 0 || spec.Bins <= 0 {
t.Fatalf("invalid spec size")
}
if len(spec.Values) != spec.Frames*spec.Bins {
t.Fatalf("values len mismatch")
}
if spec.Min >= spec.Max {
t.Fatalf("min/max not set")
}
if spec.BinHz <= 0 {
t.Fatalf("invalid bin hz")
}
}
func TestHannWindow(t *testing.T) {
w1 := HannWindow(1)
if len(w1) != 1 || w1[0] != 1 {
t.Fatalf("hann size 1")
}
w := HannWindow(4)
if len(w) != 4 {
t.Fatalf("hann size 4")
}
if w[0] != 0 || w[3] != 0 {
t.Fatalf("hann endpoints")
}
}
func TestComputeSpectrogramDefaults(t *testing.T) {
samples := make([]float64, 100)
spec := ComputeSpectrogram(samples, 0, 0, 0)
if spec.SampleRate != 44100 {
t.Fatalf("sample rate default = %d", spec.SampleRate)
}
if spec.WindowSize != 2048 {
t.Fatalf("window default = %d", spec.WindowSize)
}
if spec.Frames != 1 {
t.Fatalf("frames = %d", spec.Frames)
}
}

View File

@ -0,0 +1,92 @@
// Package render turns spectrograms into images.
package render
import (
"errors"
"image/color"
)
// Palette maps a normalized value to a color.
type Palette func(t float64) color.RGBA
type stop struct {
pos float64
c color.RGBA
}
// PaletteByName returns a palette for a given name.
func PaletteByName(name string) (Palette, error) {
switch name {
case "classic":
return gradient([]stop{
{0.0, rgb(0, 0, 0)},
{0.2, rgb(0, 32, 96)},
{0.45, rgb(0, 160, 200)},
{0.7, rgb(255, 180, 0)},
{1.0, rgb(255, 255, 255)},
}), nil
case "magma":
return gradient([]stop{
{0.0, rgb(0, 0, 4)},
{0.25, rgb(59, 12, 87)},
{0.5, rgb(180, 54, 122)},
{0.75, rgb(251, 140, 60)},
{1.0, rgb(252, 253, 191)},
}), nil
case "inferno":
return gradient([]stop{
{0.0, rgb(0, 0, 4)},
{0.25, rgb(61, 9, 101)},
{0.5, rgb(187, 55, 84)},
{0.75, rgb(249, 142, 8)},
{1.0, rgb(252, 255, 164)},
}), nil
case "viridis":
return gradient([]stop{
{0.0, rgb(68, 1, 84)},
{0.25, rgb(58, 82, 139)},
{0.5, rgb(32, 144, 140)},
{0.75, rgb(94, 201, 98)},
{1.0, rgb(253, 231, 37)},
}), nil
case "gray", "grey":
return gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 255, 255)}}), nil
default:
return nil, errors.New("unknown palette")
}
}
func gradient(stops []stop) Palette {
return func(t float64) color.RGBA {
if t <= 0 {
return stops[0].c
}
if t >= 1 {
return stops[len(stops)-1].c
}
for i := 0; i < len(stops)-1; i++ {
if t >= stops[i].pos && t <= stops[i+1].pos {
span := stops[i+1].pos - stops[i].pos
if span <= 0 {
return stops[i+1].c
}
local := (t - stops[i].pos) / span
return lerp(stops[i].c, stops[i+1].c, local)
}
}
return stops[len(stops)-1].c
}
}
func lerp(a, b color.RGBA, t float64) color.RGBA {
return color.RGBA{
R: uint8(float64(a.R) + (float64(b.R)-float64(a.R))*t),
G: uint8(float64(a.G) + (float64(b.G)-float64(a.G))*t),
B: uint8(float64(a.B) + (float64(b.B)-float64(a.B))*t),
A: 255,
}
}
func rgb(r, g, b uint8) color.RGBA {
return color.RGBA{R: r, G: g, B: b, A: 255}
}

102
internal/render/render.go Normal file
View File

@ -0,0 +1,102 @@
// Package render turns spectrograms into images.
package render
import (
"fmt"
"image"
"math"
"github.com/steipete/songsee/internal/dsp"
)
// Options configures spectrogram rendering.
type Options struct {
Width int
Height int
MinFreq float64
MaxFreq float64
Palette Palette
MinDB float64
MaxDB float64
ClampDB bool
FlipVert bool
}
// Spectrogram renders a spectrogram into an RGBA image.
func Spectrogram(spec dsp.Spectrogram, opts Options) (*image.RGBA, error) {
if opts.Width <= 0 || opts.Height <= 0 {
return nil, fmt.Errorf("invalid output size")
}
if opts.Palette == nil {
return nil, fmt.Errorf("palette required")
}
minDB := spec.Min
maxDB := spec.Max
if opts.ClampDB {
minDB = opts.MinDB
maxDB = opts.MaxDB
}
if maxDB <= minDB {
maxDB = minDB + 1
}
minBin := 0
maxBin := spec.Bins - 1
if opts.MinFreq > 0 {
minBin = int(opts.MinFreq / spec.BinHz)
}
if opts.MaxFreq > 0 {
maxBin = int(opts.MaxFreq / spec.BinHz)
}
if minBin < 0 {
minBin = 0
}
if maxBin >= spec.Bins {
maxBin = spec.Bins - 1
}
if maxBin <= minBin {
minBin = 0
maxBin = spec.Bins - 1
}
binSpan := maxBin - minBin
img := image.NewRGBA(image.Rect(0, 0, opts.Width, opts.Height))
frames := spec.Frames
bins := spec.Bins
for x := 0; x < opts.Width; x++ {
frame := 0
if frames > 1 && opts.Width > 1 {
frame = int(math.Round(float64(x) * float64(frames-1) / float64(opts.Width-1)))
}
frameOffset := frame * bins
for y := 0; y < opts.Height; y++ {
pos := 0.0
if opts.Height > 1 {
pos = float64(y) / float64(opts.Height-1)
}
bin := minBin + int(math.Round((1-pos)*float64(binSpan)))
if bin < minBin {
bin = minBin
}
if bin > maxBin {
bin = maxBin
}
val := spec.Values[frameOffset+bin]
norm := (val - minDB) / (maxDB - minDB)
if norm < 0 {
norm = 0
}
if norm > 1 {
norm = 1
}
c := opts.Palette(norm)
ypos := y
if opts.FlipVert {
ypos = opts.Height - 1 - y
}
img.SetRGBA(x, ypos, c)
}
}
return img, nil
}

View File

@ -0,0 +1,154 @@
package render
import (
"image/color"
"testing"
"github.com/steipete/songsee/internal/dsp"
)
func TestPaletteByName(t *testing.T) {
names := []string{"classic", "magma", "inferno", "viridis", "gray", "grey"}
for _, name := range names {
if _, err := PaletteByName(name); err != nil {
t.Fatalf("palette %s: %v", name, err)
}
}
if _, err := PaletteByName("nope"); err == nil {
t.Fatalf("expected error for unknown palette")
}
}
func TestRenderSpectrogram(t *testing.T) {
spec := dsp.Spectrogram{
Frames: 2,
Bins: 2,
Values: []float64{-20, -5, -10, -1},
Min: -20,
Max: -1,
BinHz: 100,
}
img, err := Spectrogram(spec, Options{
Width: 4,
Height: 4,
Palette: func(t float64) color.RGBA { return color.RGBA{R: uint8(255 * t), A: 255} },
})
if err != nil {
t.Fatalf("RenderSpectrogram: %v", err)
}
if img.Bounds().Dx() != 4 || img.Bounds().Dy() != 4 {
t.Fatalf("unexpected bounds")
}
c1 := img.RGBAAt(0, 0)
c2 := img.RGBAAt(3, 3)
if c1 == c2 {
t.Fatalf("expected varying pixels")
}
}
func TestRenderSpectrogramErrors(t *testing.T) {
spec := dsp.Spectrogram{
Frames: 1,
Bins: 1,
Values: []float64{0},
Min: 0,
Max: 1,
BinHz: 100,
}
if _, err := Spectrogram(spec, Options{Width: 0, Height: 1, Palette: func(float64) color.RGBA { return color.RGBA{} }}); err == nil {
t.Fatalf("expected size error")
}
if _, err := Spectrogram(spec, Options{Width: 1, Height: 1}); err == nil {
t.Fatalf("expected palette error")
}
}
func TestRenderSpectrogramClampAndRange(t *testing.T) {
spec := dsp.Spectrogram{
Frames: 3,
Bins: 4,
Values: []float64{-80, -40, -20, 0, -70, -35, -15, -2, -60, -30, -10, -1},
Min: -80,
Max: 0,
BinHz: 100,
}
img, err := Spectrogram(spec, Options{
Width: 3,
Height: 2,
MinFreq: 50,
MaxFreq: 250,
Palette: func(t float64) color.RGBA { return color.RGBA{B: uint8(255 * t), A: 255} },
MinDB: -60,
MaxDB: -10,
ClampDB: true,
FlipVert: true,
})
if err != nil {
t.Fatalf("RenderSpectrogram: %v", err)
}
if img.Bounds().Dx() != 3 || img.Bounds().Dy() != 2 {
t.Fatalf("unexpected bounds")
}
}
func TestGradientEndpoints(t *testing.T) {
p := gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 0, 0)}})
if c := p(0); c.R != 0 || c.G != 0 || c.B != 0 {
t.Fatalf("start color mismatch")
}
if c := p(1); c.R != 255 || c.G != 0 || c.B != 0 {
t.Fatalf("end color mismatch")
}
if c := p(0.5); c.R == 0 || c.R == 255 {
t.Fatalf("mid color not interpolated")
}
if c := p(-1); c.R != 0 {
t.Fatalf("clamp low")
}
if c := p(2); c.R != 255 {
t.Fatalf("clamp high")
}
}
func TestRenderSpectrogramSinglePixel(t *testing.T) {
spec := dsp.Spectrogram{
Frames: 1,
Bins: 1,
Values: []float64{-10},
Min: -10,
Max: -10,
BinHz: 100,
}
img, err := Spectrogram(spec, Options{
Width: 1,
Height: 1,
Palette: func(_ float64) color.RGBA { return color.RGBA{G: 200, A: 255} },
})
if err != nil {
t.Fatalf("RenderSpectrogram: %v", err)
}
if img.Bounds().Dx() != 1 || img.Bounds().Dy() != 1 {
t.Fatalf("unexpected bounds")
}
}
func TestRenderSpectrogramRangeReset(t *testing.T) {
spec := dsp.Spectrogram{
Frames: 2,
Bins: 3,
Values: []float64{-10, -5, -1, -10, -5, -1},
Min: -10,
Max: -1,
BinHz: 100,
}
_, err := Spectrogram(spec, Options{
Width: 2,
Height: 2,
MinFreq: 1000,
MaxFreq: 200,
Palette: func(_ float64) color.RGBA { return color.RGBA{R: 50, A: 255} },
})
if err != nil {
t.Fatalf("RenderSpectrogram: %v", err)
}
}

BIN
testdata/sine.mp3 vendored Normal file

Binary file not shown.