feat: add songsee CLI and tests
This commit is contained in:
parent
b7a5d13990
commit
ebb7912ac3
BIN
cmd/songsee/-.jpg
Normal file
BIN
cmd/songsee/-.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 32 KiB |
261
cmd/songsee/main.go
Normal file
261
cmd/songsee/main.go
Normal file
@ -0,0 +1,261 @@
|
||||
// Package main provides the songsee CLI entrypoint.
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/jpeg"
|
||||
"image/png"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/alecthomas/kong"
|
||||
"github.com/steipete/songsee/internal/audio"
|
||||
"github.com/steipete/songsee/internal/dsp"
|
||||
"github.com/steipete/songsee/internal/render"
|
||||
)
|
||||
|
||||
var version = "dev"
|
||||
|
||||
type cli struct {
|
||||
Input string `arg:"" help:"file path or '-' for stdin"`
|
||||
Output string `short:"o" help:"output image path"`
|
||||
Format string `help:"output format: jpg or png" default:"jpg"`
|
||||
Width int `help:"output width in pixels" default:"1920"`
|
||||
Height int `help:"output height in pixels" default:"1080"`
|
||||
WindowSize int `name:"window" help:"FFT window size in samples" default:"2048"`
|
||||
HopSize int `name:"hop" help:"hop size in samples" default:"512"`
|
||||
MinFreq float64 `name:"min-freq" help:"minimum frequency in Hz"`
|
||||
MaxFreq float64 `name:"max-freq" help:"maximum frequency in Hz (0 = Nyquist)"`
|
||||
StartSec float64 `name:"start" help:"start time in seconds"`
|
||||
Duration float64 `name:"duration" help:"duration in seconds (0 = full)"`
|
||||
SampleRate int `name:"sample-rate" help:"ffmpeg output sample rate" default:"44100"`
|
||||
Style string `help:"palette style: classic, magma, inferno, viridis, gray" default:"classic"`
|
||||
FFmpegPath string `name:"ffmpeg" help:"path to ffmpeg binary"`
|
||||
Quiet bool `short:"q" help:"suppress stdout output"`
|
||||
Verbose bool `short:"v" help:"verbose stderr output"`
|
||||
Version kong.VersionFlag `name:"version" help:"print version"`
|
||||
}
|
||||
|
||||
type exitPanic struct {
|
||||
code int
|
||||
}
|
||||
|
||||
func main() {
|
||||
os.Exit(run(os.Args[1:], os.Stdin, os.Stdout, os.Stderr))
|
||||
}
|
||||
|
||||
func run(args []string, stdin io.Reader, stdout, stderr io.Writer) int {
|
||||
formatSet := hasFlag(args, "--format")
|
||||
cfg := cli{}
|
||||
exitCode := -1
|
||||
|
||||
parser, err := kong.New(&cfg,
|
||||
kong.Name("songsee"),
|
||||
kong.Description("generate a classic spectrogram image"),
|
||||
kong.Vars{"version": version},
|
||||
kong.Writers(stdout, stderr),
|
||||
kong.Exit(func(code int) { panic(exitPanic{code: code}) }),
|
||||
)
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintln(stderr, "songsee:", err)
|
||||
return 1
|
||||
}
|
||||
|
||||
var ctx *kong.Context
|
||||
func() {
|
||||
defer func() {
|
||||
if recovered := recover(); recovered != nil {
|
||||
if exit, ok := recovered.(exitPanic); ok {
|
||||
exitCode = exit.code
|
||||
return
|
||||
}
|
||||
panic(recovered)
|
||||
}
|
||||
}()
|
||||
ctx, err = parser.Parse(args)
|
||||
}()
|
||||
if exitCode >= 0 {
|
||||
return exitCode
|
||||
}
|
||||
if err != nil {
|
||||
if parseErr, ok := err.(*kong.ParseError); ok {
|
||||
_, _ = fmt.Fprintln(stderr, "songsee:", parseErr)
|
||||
if parseErr.Context != nil {
|
||||
parseErr.Context.Stdout = stderr
|
||||
_ = parseErr.Context.PrintUsage(false)
|
||||
}
|
||||
return 2
|
||||
}
|
||||
_, _ = fmt.Fprintln(stderr, "songsee:", err)
|
||||
return 1
|
||||
}
|
||||
|
||||
input := cfg.Input
|
||||
if input == "" {
|
||||
if ctx != nil {
|
||||
ctx.Stdout = stderr
|
||||
_ = ctx.PrintUsage(false)
|
||||
}
|
||||
return 2
|
||||
}
|
||||
|
||||
if cfg.MaxFreq > 0 && cfg.MaxFreq <= cfg.MinFreq {
|
||||
return dieUsage(stderr, ctx, "--max-freq must be > --min-freq")
|
||||
}
|
||||
if cfg.Width <= 0 || cfg.Height <= 0 {
|
||||
return dieUsage(stderr, ctx, "--width and --height must be > 0")
|
||||
}
|
||||
if cfg.WindowSize <= 0 || cfg.HopSize <= 0 {
|
||||
return dieUsage(stderr, ctx, "--window and --hop must be > 0")
|
||||
}
|
||||
if !isPowerOfTwo(cfg.WindowSize) {
|
||||
return dieUsage(stderr, ctx, "--window must be a power of two")
|
||||
}
|
||||
if cfg.StartSec < 0 || cfg.Duration < 0 {
|
||||
return dieUsage(stderr, ctx, "--start and --duration must be >= 0")
|
||||
}
|
||||
|
||||
format := strings.ToLower(cfg.Format)
|
||||
if format != "jpg" && format != "jpeg" && format != "png" {
|
||||
return dieUsage(stderr, ctx, "--format must be jpg or png")
|
||||
}
|
||||
if format == "jpeg" {
|
||||
format = "jpg"
|
||||
}
|
||||
|
||||
output := cfg.Output
|
||||
if output == "" {
|
||||
if input == "-" {
|
||||
output = "songsee." + format
|
||||
} else {
|
||||
ext := strings.ToLower(filepath.Ext(input))
|
||||
base := strings.TrimSuffix(filepath.Base(input), ext)
|
||||
output = filepath.Join(filepath.Dir(input), base+"."+format)
|
||||
}
|
||||
} else {
|
||||
ext := strings.ToLower(filepath.Ext(output))
|
||||
switch ext {
|
||||
case ".png":
|
||||
format = "png"
|
||||
case ".jpg", ".jpeg":
|
||||
format = "jpg"
|
||||
default:
|
||||
if !formatSet {
|
||||
output = output + "." + format
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.Verbose {
|
||||
_, _ = fmt.Fprintf(stderr, "input: %s\n", input)
|
||||
_, _ = fmt.Fprintf(stderr, "output: %s (%s)\n", output, format)
|
||||
}
|
||||
|
||||
opts := audio.Options{SampleRate: cfg.SampleRate, FFmpegPath: cfg.FFmpegPath}
|
||||
var pcm audio.Audio
|
||||
if input == "-" {
|
||||
pcm, err = audio.DecodeReader(stdin, opts)
|
||||
} else {
|
||||
pcm, err = audio.DecodeFile(input, opts)
|
||||
}
|
||||
if err != nil {
|
||||
return die(stderr, err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
return die(stderr, errors.New("no samples decoded"))
|
||||
}
|
||||
if cfg.Verbose {
|
||||
_, _ = fmt.Fprintf(stderr, "decoded: %d samples @ %d Hz\n", len(pcm.Samples), pcm.SampleRate)
|
||||
}
|
||||
if cfg.StartSec > 0 || cfg.Duration > 0 {
|
||||
pcm, err = audio.Slice(pcm, cfg.StartSec, cfg.Duration)
|
||||
if err != nil {
|
||||
return die(stderr, err)
|
||||
}
|
||||
if cfg.Verbose {
|
||||
_, _ = fmt.Fprintf(stderr, "slice: %0.2fs + %0.2fs => %d samples\n", cfg.StartSec, cfg.Duration, len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
spec := dsp.ComputeSpectrogram(pcm.Samples, pcm.SampleRate, cfg.WindowSize, cfg.HopSize)
|
||||
style := strings.ToLower(strings.TrimSpace(cfg.Style))
|
||||
palette, err := render.PaletteByName(style)
|
||||
if err != nil {
|
||||
return dieUsage(stderr, ctx, "unknown style")
|
||||
}
|
||||
|
||||
img, err := render.Spectrogram(spec, render.Options{
|
||||
Width: cfg.Width,
|
||||
Height: cfg.Height,
|
||||
MinFreq: cfg.MinFreq,
|
||||
MaxFreq: cfg.MaxFreq,
|
||||
Palette: palette,
|
||||
})
|
||||
if err != nil {
|
||||
return die(stderr, err)
|
||||
}
|
||||
|
||||
if err := writeImage(output, format, img, stdout); err != nil {
|
||||
return die(stderr, err)
|
||||
}
|
||||
|
||||
if output != "-" && !cfg.Quiet {
|
||||
_, _ = fmt.Fprintln(stdout, output)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func writeImage(path, format string, img image.Image, stdout io.Writer) error {
|
||||
var out io.Writer
|
||||
if path == "-" {
|
||||
out = stdout
|
||||
} else {
|
||||
file, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
out = file
|
||||
}
|
||||
|
||||
switch format {
|
||||
case "png":
|
||||
return png.Encode(out, img)
|
||||
case "jpg":
|
||||
return jpeg.Encode(out, img, &jpeg.Options{Quality: 95})
|
||||
default:
|
||||
return fmt.Errorf("unknown format %s", format)
|
||||
}
|
||||
}
|
||||
|
||||
func die(stderr io.Writer, err error) int {
|
||||
_, _ = fmt.Fprintln(stderr, "songsee:", err)
|
||||
return 1
|
||||
}
|
||||
|
||||
func dieUsage(stderr io.Writer, ctx *kong.Context, msg string) int {
|
||||
_, _ = fmt.Fprintln(stderr, "songsee:", msg)
|
||||
if ctx != nil {
|
||||
ctx.Stdout = stderr
|
||||
_ = ctx.PrintUsage(false)
|
||||
}
|
||||
return 2
|
||||
}
|
||||
|
||||
func isPowerOfTwo(v int) bool {
|
||||
return v > 0 && (v&(v-1)) == 0
|
||||
}
|
||||
|
||||
func hasFlag(args []string, name string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
arg := args[i]
|
||||
if arg == name || strings.HasPrefix(arg, name+"=") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
580
cmd/songsee/main_test.go
Normal file
580
cmd/songsee/main_test.go
Normal file
@ -0,0 +1,580 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"image"
|
||||
"image/png"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRunMP3E2E(t *testing.T) {
|
||||
input := testdataPath(t, "sine.mp3")
|
||||
outDir := t.TempDir()
|
||||
outPath := filepath.Join(outDir, "spectro.jpg")
|
||||
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{
|
||||
"--width", "320",
|
||||
"--height", "180",
|
||||
"--start", "0.2",
|
||||
"--duration", "0.5",
|
||||
"--style", "magma",
|
||||
"--output", outPath,
|
||||
input,
|
||||
}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if stdout.String() == "" {
|
||||
t.Fatalf("expected stdout output")
|
||||
}
|
||||
info, err := os.Stat(outPath)
|
||||
if err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
if info.Size() == 0 {
|
||||
t.Fatalf("empty output")
|
||||
}
|
||||
|
||||
file, err := os.Open(outPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open output: %v", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
img, _, err := image.Decode(file)
|
||||
if err != nil {
|
||||
t.Fatalf("decode image: %v", err)
|
||||
}
|
||||
if img.Bounds().Dx() != 320 || img.Bounds().Dy() != 180 {
|
||||
t.Fatalf("size mismatch")
|
||||
}
|
||||
if flatImage(img) {
|
||||
t.Fatalf("image appears flat")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunFromStdinPNG(t *testing.T) {
|
||||
outDir := t.TempDir()
|
||||
outPath := filepath.Join(outDir, "spectro.png")
|
||||
|
||||
wav := makeWAV([]int16{0, 2000, -2000, 0, 1000, -1000}, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{
|
||||
"--format", "png",
|
||||
"--output", outPath,
|
||||
"-",
|
||||
}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
file, err := os.Open(outPath)
|
||||
if err != nil {
|
||||
t.Fatalf("open output: %v", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
if _, err := png.Decode(file); err != nil {
|
||||
t.Fatalf("decode png: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunVersion(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--version"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d", exit)
|
||||
}
|
||||
if stdout.String() == "" {
|
||||
t.Fatalf("expected version output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunHelp(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--help"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d", exit)
|
||||
}
|
||||
if stdout.String() == "" && stderr.String() == "" {
|
||||
t.Fatalf("expected help output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunInvalidWindow(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--window", "1000", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
if stderr.String() == "" {
|
||||
t.Fatalf("expected stderr usage")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunUnknownFlag(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--nope"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
if stderr.String() == "" {
|
||||
t.Fatalf("expected stderr output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBadFormat(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "gif", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBadFreqRange(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--min-freq", "100", "--max-freq", "50", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunUnknownStyle(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--style", "nope", "-"}, bytes.NewReader(makeWAV([]int16{0, 1}, 44100, 1)), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBadSize(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--width", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBadWindowZero(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--window", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBadHopZero(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--hop", "0", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunNegativeStart(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--start=-1", "-"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMissingFile(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"nope.wav"}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 1 {
|
||||
t.Fatalf("expected error exit, got %d", exit)
|
||||
}
|
||||
if stderr.String() == "" {
|
||||
t.Fatalf("expected stderr output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunMissingInput(t *testing.T) {
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 2 {
|
||||
t.Fatalf("expected usage exit, got %d", exit)
|
||||
}
|
||||
if stderr.String() == "" {
|
||||
t.Fatalf("expected stderr output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunNoSamplesDecoded(t *testing.T) {
|
||||
wav := makeWAV([]int16{}, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"-"}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 1 {
|
||||
t.Fatalf("expected error exit, got %d", exit)
|
||||
}
|
||||
if !bytes.Contains(stderr.Bytes(), []byte("no samples")) {
|
||||
t.Fatalf("expected no samples error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSliceError(t *testing.T) {
|
||||
wav := makeWAV([]int16{0, 1, -1, 0}, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--start", "2", "--duration", "1", "-"}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 1 {
|
||||
t.Fatalf("expected error exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunSliceVerbose(t *testing.T) {
|
||||
samples := make([]int16, 44100)
|
||||
wav := makeWAV(samples, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--verbose", "--start", "0", "--duration", "0.2", "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if !bytes.Contains(stderr.Bytes(), []byte("slice:")) {
|
||||
t.Fatalf("expected slice output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunStyleAffectsOutput(t *testing.T) {
|
||||
wav := makeWAV(genSineMixSamples(44100), 44100, 1)
|
||||
outClassic := runToBytes(t, wav, "classic")
|
||||
outMagma := runToBytes(t, wav, "magma")
|
||||
if bytes.Equal(outClassic, outMagma) {
|
||||
t.Fatalf("expected different output for different styles")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOutputStdout(t *testing.T) {
|
||||
wav := makeWAV([]int16{0, 1000, -1000, 0, 500, -500}, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{
|
||||
"--format", "png",
|
||||
"--output", "-",
|
||||
"-",
|
||||
}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if stdout.Len() == 0 {
|
||||
t.Fatalf("expected image bytes on stdout")
|
||||
}
|
||||
if _, err := png.Decode(bytes.NewReader(stdout.Bytes())); err != nil {
|
||||
t.Fatalf("decode stdout png: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOutputAuto(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "png", input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
outPath := filepath.Join(dir, "input.png")
|
||||
if _, err := os.Stat(outPath); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOutputExtOverride(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out.png")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(output); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOutputAppendDefault(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(output + ".jpg"); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunOutputJpgExt(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out.jpg")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(output); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunWriteImageError(t *testing.T) {
|
||||
wav := makeWAV([]int16{0, 1000, -1000, 0}, 44100, 1)
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--output", "/nope/dir/out.jpg", "-"}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 1 {
|
||||
t.Fatalf("expected error exit, got %d", exit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunFormatFlagKeepsOutput(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "customout")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "png", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(output); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunQuiet(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out.jpg")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--quiet", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if stdout.String() != "" {
|
||||
t.Fatalf("expected quiet stdout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDie(t *testing.T) {
|
||||
stderr := &bytes.Buffer{}
|
||||
if code := die(stderr, errSentinel{}); code != 1 {
|
||||
t.Fatalf("expected code 1")
|
||||
}
|
||||
if stderr.String() == "" {
|
||||
t.Fatalf("expected stderr output")
|
||||
}
|
||||
}
|
||||
|
||||
type errSentinel struct{}
|
||||
|
||||
func (errSentinel) Error() string { return "boom" }
|
||||
|
||||
func TestRunInputDashDefaultOutput(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("Getwd: %v", err)
|
||||
}
|
||||
if err := os.Chdir(tmp); err != nil {
|
||||
t.Fatalf("Chdir: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = os.Chdir(cwd) })
|
||||
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "png", "-"}, bytes.NewReader(makeWAV([]int16{0, 1, -1}, 44100, 1)), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(tmp, "songsee.png")); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunFormatJPEG(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "jpeg", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if _, err := os.Stat(output); err != nil {
|
||||
t.Fatalf("missing output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteImageUnknownFormat(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
err := writeImage("-", "gif", image.NewRGBA(image.Rect(0, 0, 1, 1)), buf)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunVerbose(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
input := filepath.Join(dir, "input.wav")
|
||||
if err := os.WriteFile(input, makeWAV([]int16{0, 1, -1, 0}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write input: %v", err)
|
||||
}
|
||||
output := filepath.Join(dir, "out.jpg")
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--verbose", "--output", output, input}, bytes.NewReader(nil), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
if !bytes.Contains(stderr.Bytes(), []byte("decoded:")) {
|
||||
t.Fatalf("expected verbose output")
|
||||
}
|
||||
}
|
||||
|
||||
func testdataPath(t *testing.T, name string) string {
|
||||
t.Helper()
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("Getwd: %v", err)
|
||||
}
|
||||
root := filepath.Dir(filepath.Dir(wd))
|
||||
path := filepath.Join(root, "testdata", name)
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Fatalf("missing testdata: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func flatImage(img image.Image) bool {
|
||||
bounds := img.Bounds()
|
||||
minLum := uint32(0xFFFFFFFF)
|
||||
maxLum := uint32(0)
|
||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||
r, g, b, _ := img.At(x, y).RGBA()
|
||||
lum := (r + g + b) / 3
|
||||
if lum < minLum {
|
||||
minLum = lum
|
||||
}
|
||||
if lum > maxLum {
|
||||
maxLum = lum
|
||||
}
|
||||
}
|
||||
}
|
||||
return maxLum-minLum < 1000
|
||||
}
|
||||
|
||||
func makeWAV(samples []int16, sampleRate int, channels int) []byte {
|
||||
if channels < 1 {
|
||||
channels = 1
|
||||
}
|
||||
dataLen := len(samples) * 2
|
||||
riffSize := 4 + (8 + 16) + (8 + dataLen)
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, uint32(riffSize))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 1)
|
||||
writeU16(buf, uint16(channels))
|
||||
writeU32(buf, uint32(sampleRate))
|
||||
byteRate := sampleRate * channels * 2
|
||||
writeU32(buf, uint32(byteRate))
|
||||
blockAlign := channels * 2
|
||||
writeU16(buf, uint16(blockAlign))
|
||||
writeU16(buf, 16)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, uint32(dataLen))
|
||||
for _, s := range samples {
|
||||
writeU16(buf, uint16(s))
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func writeU16(buf *bytes.Buffer, v uint16) {
|
||||
buf.WriteByte(byte(v))
|
||||
buf.WriteByte(byte(v >> 8))
|
||||
}
|
||||
|
||||
func writeU32(buf *bytes.Buffer, v uint32) {
|
||||
buf.WriteByte(byte(v))
|
||||
buf.WriteByte(byte(v >> 8))
|
||||
buf.WriteByte(byte(v >> 16))
|
||||
buf.WriteByte(byte(v >> 24))
|
||||
}
|
||||
|
||||
func runToBytes(t *testing.T, wav []byte, style string) []byte {
|
||||
t.Helper()
|
||||
stdout := &bytes.Buffer{}
|
||||
stderr := &bytes.Buffer{}
|
||||
exit := run([]string{"--format", "png", "--style", style, "--output", "-", "-"}, bytes.NewReader(wav), stdout, stderr)
|
||||
if exit != 0 {
|
||||
t.Fatalf("exit %d stderr=%s", exit, stderr.String())
|
||||
}
|
||||
return stdout.Bytes()
|
||||
}
|
||||
|
||||
func genSineMixSamples(n int) []int16 {
|
||||
out := make([]int16, n)
|
||||
for i := 0; i < n; i++ {
|
||||
t := float64(i) / float64(n)
|
||||
v := 0.5*math.Sin(2*math.Pi*440*t) + 0.4*math.Sin(2*math.Pi*880*t)
|
||||
out[i] = int16(v * 15000)
|
||||
}
|
||||
return out
|
||||
}
|
||||
7
go.mod
Normal file
7
go.mod
Normal file
@ -0,0 +1,7 @@
|
||||
module github.com/steipete/songsee
|
||||
|
||||
go 1.25
|
||||
|
||||
require github.com/hajimehoshi/go-mp3 v0.3.4
|
||||
|
||||
require github.com/alecthomas/kong v1.13.0
|
||||
12
go.sum
Normal file
12
go.sum
Normal file
@ -0,0 +1,12 @@
|
||||
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
|
||||
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
|
||||
github.com/alecthomas/kong v1.13.0 h1:5e/7XC3ugvhP1DQBmTS+WuHtCbcv44hsohMgcvVxSrA=
|
||||
github.com/alecthomas/kong v1.13.0/go.mod h1:wrlbXem1CWqUV5Vbmss5ISYhsVPkBb1Yo7YKJghju2I=
|
||||
github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
|
||||
github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
|
||||
github.com/hajimehoshi/go-mp3 v0.3.4 h1:NUP7pBYH8OguP4diaTZ9wJbUbk3tC0KlfzsEpWmYj68=
|
||||
github.com/hajimehoshi/go-mp3 v0.3.4/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo=
|
||||
github.com/hajimehoshi/oto/v2 v2.3.1/go.mod h1:seWLbgHH7AyUMYKfKYT9pg7PhUu9/SisyJvNTT+ASQo=
|
||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||
golang.org/x/sys v0.0.0-20220712014510-0a85c31ab51e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
21
internal/audio/audio.go
Normal file
21
internal/audio/audio.go
Normal file
@ -0,0 +1,21 @@
|
||||
// Package audio handles decoding audio into mono float samples.
|
||||
package audio
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Audio holds mono samples in [-1,1] range.
|
||||
type Audio struct {
|
||||
SampleRate int
|
||||
Samples []float64
|
||||
}
|
||||
|
||||
// Options controls decoding behavior.
|
||||
type Options struct {
|
||||
SampleRate int
|
||||
FFmpegPath string
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrUnsupported is returned when no decoder can handle the input.
|
||||
ErrUnsupported = fmt.Errorf("unsupported audio format")
|
||||
)
|
||||
249
internal/audio/audio_test.go
Normal file
249
internal/audio/audio_test.go
Normal file
@ -0,0 +1,249 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeWAVBytes(t *testing.T) {
|
||||
samples := make([]int16, 1000)
|
||||
for i := range samples {
|
||||
samples[i] = int16(2000 * math.Sin(2*math.Pi*float64(i)/50))
|
||||
}
|
||||
data := makeWAV(samples, 44100, 1)
|
||||
pcm, err := DecodeBytes(data, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if pcm.SampleRate != 44100 {
|
||||
t.Fatalf("sample rate = %d", pcm.SampleRate)
|
||||
}
|
||||
if len(pcm.Samples) != len(samples) {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVIfNotWAV(t *testing.T) {
|
||||
_, ok, err := DecodeWAVIf(bytesReader([]byte("NOTWAVE12345")))
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeWAVIf error: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Fatalf("expected ok=false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVIfValid(t *testing.T) {
|
||||
data := makeWAV([]int16{0, 1000, -1000}, 44100, 1)
|
||||
pcm, ok, err := DecodeWAVIf(bytesReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeWAVIf error: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("expected ok=true")
|
||||
}
|
||||
if len(pcm.Samples) != 3 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMP3File(t *testing.T) {
|
||||
path := testdataPath(t, "sine.mp3")
|
||||
pcm, err := DecodeFile(path, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeFile: %v", err)
|
||||
}
|
||||
if pcm.SampleRate == 0 || len(pcm.Samples) == 0 {
|
||||
t.Fatalf("invalid decode result")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMP3IfValid(t *testing.T) {
|
||||
path := testdataPath(t, "sine.mp3")
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile: %v", err)
|
||||
}
|
||||
pcm, ok, err := DecodeMP3If(bytesReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMP3If: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("expected ok=true")
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeFileUnknownExt(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audio.bin")
|
||||
if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write file: %v", err)
|
||||
}
|
||||
pcm, err := DecodeFile(path, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeFile: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeFileWAV(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audio.wav")
|
||||
if err := os.WriteFile(path, makeWAV([]int16{0, 1, -1}, 44100, 1), 0o644); err != nil {
|
||||
t.Fatalf("write file: %v", err)
|
||||
}
|
||||
pcm, err := DecodeFile(path, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeFile: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeFileFFmpegFallbackError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "audio.bin")
|
||||
if err := os.WriteFile(path, []byte("garbagegarbagegarbage"), 0o644); err != nil {
|
||||
t.Fatalf("write file: %v", err)
|
||||
}
|
||||
if _, err := DecodeFile(path, Options{}); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeBytesFFmpegFallbackError(t *testing.T) {
|
||||
_, err := DecodeBytes([]byte("not audio"), Options{})
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for garbage data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMP3Bytes(t *testing.T) {
|
||||
path := testdataPath(t, "sine.mp3")
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile: %v", err)
|
||||
}
|
||||
pcm, err := DecodeBytes(data, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeReader(t *testing.T) {
|
||||
samples := []int16{0, 1000, -1000, 0}
|
||||
data := makeWAV(samples, 48000, 1)
|
||||
pcm, err := DecodeReader(bytesReader(data), Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeReader: %v", err)
|
||||
}
|
||||
if pcm.SampleRate != 48000 {
|
||||
t.Fatalf("sample rate = %d", pcm.SampleRate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeReaderError(t *testing.T) {
|
||||
_, err := DecodeReader(errReader{}, Options{})
|
||||
if err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSlice(t *testing.T) {
|
||||
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}}
|
||||
out, err := Slice(a, 0.2, 0.5)
|
||||
if err != nil {
|
||||
t.Fatalf("Slice: %v", err)
|
||||
}
|
||||
if len(out.Samples) != 5 {
|
||||
t.Fatalf("slice samples = %d", len(out.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSliceErrors(t *testing.T) {
|
||||
_, err := Slice(Audio{SampleRate: 10, Samples: []float64{1}}, -1, 1)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for negative start")
|
||||
}
|
||||
_, err = Slice(Audio{SampleRate: 0, Samples: []float64{1}}, 0, 1)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for sample rate")
|
||||
}
|
||||
_, err = Slice(Audio{SampleRate: 10, Samples: []float64{}}, 0, 1)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for empty samples")
|
||||
}
|
||||
_, err = Slice(Audio{SampleRate: 10, Samples: []float64{1}}, 2, 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for start")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSliceFullDuration(t *testing.T) {
|
||||
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2, 3}}
|
||||
out, err := Slice(a, 0, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("Slice: %v", err)
|
||||
}
|
||||
if len(out.Samples) != len(a.Samples) {
|
||||
t.Fatalf("expected full slice")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSliceDurationTooShort(t *testing.T) {
|
||||
a := Audio{SampleRate: 10, Samples: []float64{0, 1, 2}}
|
||||
if _, err := Slice(a, 0, 0.01); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMP3IfNotMP3(t *testing.T) {
|
||||
_, ok, err := DecodeMP3If(bytesReader([]byte("NOTMP3DATA")))
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeMP3If error: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Fatalf("expected ok=false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVUnsupported(t *testing.T) {
|
||||
_, err := decodeWAV(bytesReader([]byte("NOTWAVE12345")))
|
||||
if err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func testdataPath(t *testing.T, name string) string {
|
||||
t.Helper()
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("Getwd: %v", err)
|
||||
}
|
||||
root := filepath.Dir(filepath.Dir(wd))
|
||||
path := filepath.Join(root, "testdata", name)
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Fatalf("missing testdata: %v", err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func bytesReader(b []byte) *bytes.Reader {
|
||||
return bytes.NewReader(b)
|
||||
}
|
||||
|
||||
type errReader struct{}
|
||||
|
||||
func (errReader) Read([]byte) (int, error) { return 0, os.ErrInvalid }
|
||||
78
internal/audio/decode.go
Normal file
78
internal/audio/decode.go
Normal file
@ -0,0 +1,78 @@
|
||||
// Package audio handles decoding audio into mono float samples.
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DecodeFile reads an audio file, decoding WAV/MP3 and falling back to ffmpeg.
|
||||
func DecodeFile(path string, opts Options) (Audio, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
if ext == ".wav" || ext == ".wave" {
|
||||
if pcm, ok, err := DecodeWAVIf(file); ok {
|
||||
return pcm, err
|
||||
}
|
||||
}
|
||||
if ext == ".mp3" {
|
||||
if pcm, ok, err := DecodeMP3If(file); ok {
|
||||
return pcm, err
|
||||
}
|
||||
}
|
||||
|
||||
if pcm, ok, err := DecodeWAVIf(file); ok {
|
||||
return pcm, err
|
||||
}
|
||||
if pcm, ok, err := DecodeMP3If(file); ok {
|
||||
return pcm, err
|
||||
}
|
||||
|
||||
if opts.SampleRate == 0 {
|
||||
opts.SampleRate = 44100
|
||||
}
|
||||
pcm, err := DecodeWithFFmpeg(path, nil, opts.SampleRate, opts.FFmpegPath)
|
||||
if err != nil {
|
||||
return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err)
|
||||
}
|
||||
return pcm, nil
|
||||
}
|
||||
|
||||
// DecodeBytes decodes audio data from a byte slice.
|
||||
func DecodeBytes(data []byte, opts Options) (Audio, error) {
|
||||
reader := bytes.NewReader(data)
|
||||
if pcm, ok, err := DecodeWAVIf(reader); ok {
|
||||
return pcm, err
|
||||
}
|
||||
reader.Reset(data)
|
||||
if pcm, ok, err := DecodeMP3If(reader); ok {
|
||||
return pcm, err
|
||||
}
|
||||
|
||||
if opts.SampleRate == 0 {
|
||||
opts.SampleRate = 44100
|
||||
}
|
||||
pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), opts.SampleRate, opts.FFmpegPath)
|
||||
if err != nil {
|
||||
return Audio{}, fmt.Errorf("%w; ffmpeg fallback failed: %v", ErrUnsupported, err)
|
||||
}
|
||||
return pcm, nil
|
||||
}
|
||||
|
||||
// DecodeReader decodes audio data from an io.Reader.
|
||||
func DecodeReader(r io.Reader, opts Options) (Audio, error) {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
return DecodeBytes(data, opts)
|
||||
}
|
||||
67
internal/audio/ffmpeg.go
Normal file
67
internal/audio/ffmpeg.go
Normal file
@ -0,0 +1,67 @@
|
||||
// Package audio handles decoding audio into mono float samples.
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
// DecodeWithFFmpeg uses ffmpeg to decode any input into mono float samples.
|
||||
func DecodeWithFFmpeg(path string, stdin io.Reader, sampleRate int, ffmpegPath string) (Audio, error) {
|
||||
if sampleRate <= 0 {
|
||||
sampleRate = 44100
|
||||
}
|
||||
ffmpeg, err := resolveFFmpeg(ffmpegPath)
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
|
||||
args := []string{"-hide_banner", "-loglevel", "error"}
|
||||
if stdin != nil {
|
||||
args = append(args, "-i", "pipe:0")
|
||||
} else {
|
||||
args = append(args, "-i", path)
|
||||
}
|
||||
args = append(args, "-f", "f32le", "-ac", "1", "-ar", fmt.Sprintf("%d", sampleRate), "-")
|
||||
|
||||
cmd := exec.Command(ffmpeg, args...)
|
||||
if stdin != nil {
|
||||
cmd.Stdin = stdin
|
||||
}
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
if stderr.Len() > 0 {
|
||||
return Audio{}, fmt.Errorf("ffmpeg: %v: %s", err, stderr.String())
|
||||
}
|
||||
return Audio{}, err
|
||||
}
|
||||
|
||||
if len(out)%4 != 0 {
|
||||
return Audio{}, fmt.Errorf("ffmpeg: unexpected pcm length")
|
||||
}
|
||||
|
||||
samples := make([]float64, len(out)/4)
|
||||
for i := 0; i < len(samples); i++ {
|
||||
bits := binary.LittleEndian.Uint32(out[i*4 : i*4+4])
|
||||
samples[i] = float64(math.Float32frombits(bits))
|
||||
}
|
||||
|
||||
return Audio{SampleRate: sampleRate, Samples: samples}, nil
|
||||
}
|
||||
|
||||
func resolveFFmpeg(path string) (string, error) {
|
||||
if path != "" {
|
||||
return path, nil
|
||||
}
|
||||
ffmpeg, err := exec.LookPath("ffmpeg")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ffmpeg not found in PATH")
|
||||
}
|
||||
return ffmpeg, nil
|
||||
}
|
||||
74
internal/audio/ffmpeg_test.go
Normal file
74
internal/audio/ffmpeg_test.go
Normal file
@ -0,0 +1,74 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestResolveFFmpeg(t *testing.T) {
|
||||
path, err := resolveFFmpeg("")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveFFmpeg: %v", err)
|
||||
}
|
||||
if path == "" {
|
||||
t.Fatalf("empty ffmpeg path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveFFmpegExplicit(t *testing.T) {
|
||||
found, err := resolveFFmpeg("")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveFFmpeg: %v", err)
|
||||
}
|
||||
path, err := resolveFFmpeg(found)
|
||||
if err != nil {
|
||||
t.Fatalf("resolveFFmpeg explicit: %v", err)
|
||||
}
|
||||
if path == "" {
|
||||
t.Fatalf("empty ffmpeg path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveFFmpegMissing(t *testing.T) {
|
||||
t.Setenv("PATH", "")
|
||||
if _, err := resolveFFmpeg(""); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWithFFmpegFile(t *testing.T) {
|
||||
path := testdataPath(t, "sine.mp3")
|
||||
pcm, err := DecodeWithFFmpeg(path, nil, 22050, "")
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeWithFFmpeg: %v", err)
|
||||
}
|
||||
if pcm.SampleRate != 22050 {
|
||||
t.Fatalf("sample rate = %d", pcm.SampleRate)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWithFFmpegStdin(t *testing.T) {
|
||||
path := testdataPath(t, "sine.mp3")
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile: %v", err)
|
||||
}
|
||||
pcm, err := DecodeWithFFmpeg("", bytes.NewReader(data), 44100, "")
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeWithFFmpeg stdin: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWithFFmpegBadPath(t *testing.T) {
|
||||
_, err := DecodeWithFFmpeg("missing.mp3", nil, 0, "/no/such/ffmpeg")
|
||||
if err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
72
internal/audio/mp3.go
Normal file
72
internal/audio/mp3.go
Normal file
@ -0,0 +1,72 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"github.com/hajimehoshi/go-mp3"
|
||||
)
|
||||
|
||||
// DecodeMP3If tries to decode MP3 data, returning ok=false when not MP3.
|
||||
func DecodeMP3If(r io.ReadSeeker) (Audio, bool, error) {
|
||||
header := make([]byte, 4)
|
||||
if _, err := io.ReadFull(r, header); err != nil {
|
||||
return Audio{}, false, err
|
||||
}
|
||||
isMP3 := string(header[0:3]) == "ID3" || (header[0] == 0xFF && header[1]&0xE0 == 0xE0)
|
||||
_, _ = r.Seek(0, io.SeekStart)
|
||||
if !isMP3 {
|
||||
return Audio{}, false, nil
|
||||
}
|
||||
pcm, err := decodeMP3(r)
|
||||
if err != nil {
|
||||
return Audio{}, true, err
|
||||
}
|
||||
return pcm, true, nil
|
||||
}
|
||||
|
||||
func decodeMP3(r io.Reader) (Audio, error) {
|
||||
dec, err := mp3.NewDecoder(r)
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
pcm, err := io.ReadAll(dec)
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
if len(pcm)%2 != 0 {
|
||||
return Audio{}, errors.New("mp3: odd pcm length")
|
||||
}
|
||||
|
||||
channels := 1
|
||||
if len(pcm)%4 == 0 {
|
||||
channels = 2
|
||||
}
|
||||
frames := len(pcm) / (2 * channels)
|
||||
out := make([]float64, frames)
|
||||
|
||||
buf := bytes.NewReader(pcm)
|
||||
for i := 0; i < frames; i++ {
|
||||
var sum float64
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
var sample int16
|
||||
if err := binaryRead(buf, &sample); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
sum += float64(sample) / 32768.0
|
||||
}
|
||||
out[i] = sum / float64(channels)
|
||||
}
|
||||
|
||||
return Audio{SampleRate: dec.SampleRate(), Samples: out}, nil
|
||||
}
|
||||
|
||||
func binaryRead(r io.Reader, v *int16) error {
|
||||
var b [2]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
*v = int16(b[0]) | int16(b[1])<<8
|
||||
return nil
|
||||
}
|
||||
30
internal/audio/mp3_errors_test.go
Normal file
30
internal/audio/mp3_errors_test.go
Normal file
@ -0,0 +1,30 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeMP3Error(t *testing.T) {
|
||||
if _, err := decodeMP3(bytes.NewReader([]byte("not mp3"))); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinaryReadError(t *testing.T) {
|
||||
var s int16
|
||||
if err := binaryRead(bytes.NewReader([]byte{0x01}), &s); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeMP3IfCorrupt(t *testing.T) {
|
||||
data := []byte{'I', 'D', '3', 0x03, 0x00}
|
||||
_, ok, err := DecodeMP3If(bytes.NewReader(data))
|
||||
if !ok {
|
||||
t.Fatalf("expected ok=true")
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
33
internal/audio/slice.go
Normal file
33
internal/audio/slice.go
Normal file
@ -0,0 +1,33 @@
|
||||
package audio
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Slice returns a time-based slice of audio in seconds.
|
||||
func Slice(a Audio, startSec, durationSec float64) (Audio, error) {
|
||||
if startSec < 0 || durationSec < 0 {
|
||||
return Audio{}, fmt.Errorf("slice: start and duration must be >= 0")
|
||||
}
|
||||
if a.SampleRate <= 0 {
|
||||
return Audio{}, fmt.Errorf("slice: invalid sample rate")
|
||||
}
|
||||
if len(a.Samples) == 0 {
|
||||
return Audio{}, fmt.Errorf("slice: empty samples")
|
||||
}
|
||||
|
||||
start := int(startSec * float64(a.SampleRate))
|
||||
if start >= len(a.Samples) {
|
||||
return Audio{}, fmt.Errorf("slice: start beyond end")
|
||||
}
|
||||
end := len(a.Samples)
|
||||
if durationSec > 0 {
|
||||
end = start + int(durationSec*float64(a.SampleRate))
|
||||
if end > len(a.Samples) {
|
||||
end = len(a.Samples)
|
||||
}
|
||||
if end <= start {
|
||||
return Audio{}, fmt.Errorf("slice: duration too short")
|
||||
}
|
||||
}
|
||||
|
||||
return Audio{SampleRate: a.SampleRate, Samples: a.Samples[start:end]}, nil
|
||||
}
|
||||
229
internal/audio/wav.go
Normal file
229
internal/audio/wav.go
Normal file
@ -0,0 +1,229 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
)
|
||||
|
||||
// DecodeWAVIf tries to decode WAV data, returning ok=false when not WAV.
|
||||
func DecodeWAVIf(r io.ReadSeeker) (Audio, bool, error) {
|
||||
header := make([]byte, 12)
|
||||
if _, err := io.ReadFull(r, header); err != nil {
|
||||
return Audio{}, false, err
|
||||
}
|
||||
if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" {
|
||||
_, _ = r.Seek(0, io.SeekStart)
|
||||
return Audio{}, false, nil
|
||||
}
|
||||
_, _ = r.Seek(0, io.SeekStart)
|
||||
pcm, err := decodeWAV(r)
|
||||
if err != nil {
|
||||
return Audio{}, true, err
|
||||
}
|
||||
return pcm, true, nil
|
||||
}
|
||||
|
||||
func decodeWAV(r io.ReadSeeker) (Audio, error) {
|
||||
var (
|
||||
fmtFound bool
|
||||
dataFound bool
|
||||
fmtChunk wavFormat
|
||||
data []byte
|
||||
)
|
||||
|
||||
header := make([]byte, 12)
|
||||
if _, err := io.ReadFull(r, header); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
if string(header[0:4]) != "RIFF" || string(header[8:12]) != "WAVE" {
|
||||
return Audio{}, ErrUnsupported
|
||||
}
|
||||
|
||||
for {
|
||||
chunkHeader := make([]byte, 8)
|
||||
_, err := io.ReadFull(r, chunkHeader)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
chunkID := string(chunkHeader[0:4])
|
||||
chunkSize := int(binary.LittleEndian.Uint32(chunkHeader[4:8]))
|
||||
|
||||
switch chunkID {
|
||||
case "fmt ":
|
||||
fmtFound = true
|
||||
buf := make([]byte, chunkSize)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
if err := parseWavFormat(buf, &fmtChunk); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
case "data":
|
||||
dataFound = true
|
||||
data = make([]byte, chunkSize)
|
||||
if _, err := io.ReadFull(r, data); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
default:
|
||||
// Skip unknown chunk.
|
||||
if _, err := r.Seek(int64(chunkSize), io.SeekCurrent); err != nil {
|
||||
return Audio{}, err
|
||||
}
|
||||
}
|
||||
if chunkSize%2 == 1 {
|
||||
_, _ = r.Seek(1, io.SeekCurrent)
|
||||
}
|
||||
}
|
||||
|
||||
if !fmtFound || !dataFound {
|
||||
return Audio{}, errors.New("wav: missing fmt or data chunk")
|
||||
}
|
||||
return decodeWavData(fmtChunk, data)
|
||||
}
|
||||
|
||||
type wavFormat struct {
|
||||
AudioFormat uint16
|
||||
NumChannels uint16
|
||||
SampleRate uint32
|
||||
BitsPerSample uint16
|
||||
Extensible bool
|
||||
SubFormat [16]byte
|
||||
}
|
||||
|
||||
func parseWavFormat(buf []byte, fmtChunk *wavFormat) error {
|
||||
if len(buf) < 16 {
|
||||
return errors.New("wav: short fmt chunk")
|
||||
}
|
||||
fmtChunk.AudioFormat = binary.LittleEndian.Uint16(buf[0:2])
|
||||
fmtChunk.NumChannels = binary.LittleEndian.Uint16(buf[2:4])
|
||||
fmtChunk.SampleRate = binary.LittleEndian.Uint32(buf[4:8])
|
||||
fmtChunk.BitsPerSample = binary.LittleEndian.Uint16(buf[14:16])
|
||||
if fmtChunk.AudioFormat == 0xFFFE && len(buf) >= 40 {
|
||||
fmtChunk.Extensible = true
|
||||
copy(fmtChunk.SubFormat[:], buf[24:40])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeWavData(fmtChunk wavFormat, data []byte) (Audio, error) {
|
||||
format := fmtChunk.AudioFormat
|
||||
if fmtChunk.Extensible {
|
||||
// PCM subformat GUID 00000001-0000-0010-8000-00aa00389b71
|
||||
if isGUID(fmtChunk.SubFormat, 0x00000001) {
|
||||
format = 1
|
||||
} else if isGUID(fmtChunk.SubFormat, 0x00000003) {
|
||||
format = 3
|
||||
}
|
||||
}
|
||||
|
||||
switch format {
|
||||
case 1, 3:
|
||||
// PCM or IEEE float.
|
||||
default:
|
||||
return Audio{}, fmt.Errorf("wav: unsupported format %d", format)
|
||||
}
|
||||
|
||||
channels := int(fmtChunk.NumChannels)
|
||||
if channels < 1 {
|
||||
return Audio{}, errors.New("wav: invalid channel count")
|
||||
}
|
||||
|
||||
sampleRate := int(fmtChunk.SampleRate)
|
||||
bits := int(fmtChunk.BitsPerSample)
|
||||
if bits == 0 {
|
||||
return Audio{}, errors.New("wav: invalid bits per sample")
|
||||
}
|
||||
|
||||
var samples []float64
|
||||
if format == 3 {
|
||||
samples = decodeWavFloat(data, bits, channels)
|
||||
} else {
|
||||
samples = decodeWavPCM(data, bits, channels)
|
||||
}
|
||||
if samples == nil {
|
||||
return Audio{}, fmt.Errorf("wav: unsupported bit depth %d", bits)
|
||||
}
|
||||
|
||||
return Audio{SampleRate: sampleRate, Samples: samples}, nil
|
||||
}
|
||||
|
||||
func decodeWavPCM(data []byte, bits, channels int) []float64 {
|
||||
bytesPerSample := bits / 8
|
||||
frameSize := bytesPerSample * channels
|
||||
if frameSize == 0 {
|
||||
return nil
|
||||
}
|
||||
frames := len(data) / frameSize
|
||||
out := make([]float64, frames)
|
||||
idx := 0
|
||||
for i := 0; i < frames; i++ {
|
||||
var sum float64
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
off := idx + ch*bytesPerSample
|
||||
var v int32
|
||||
switch bits {
|
||||
case 8:
|
||||
v = int32(int(data[off]) - 128)
|
||||
case 16:
|
||||
v = int32(int16(binary.LittleEndian.Uint16(data[off : off+2])))
|
||||
case 24:
|
||||
b := data[off : off+3]
|
||||
v = int32(b[0]) | int32(b[1])<<8 | int32(b[2])<<16
|
||||
if v&0x800000 != 0 {
|
||||
v |= ^0xffffff
|
||||
}
|
||||
case 32:
|
||||
v = int32(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
scale := float64(int64(1) << (bits - 1))
|
||||
sum += float64(v) / scale
|
||||
}
|
||||
out[i] = sum / float64(channels)
|
||||
idx += frameSize
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func decodeWavFloat(data []byte, bits, channels int) []float64 {
|
||||
bytesPerSample := bits / 8
|
||||
frameSize := bytesPerSample * channels
|
||||
if frameSize == 0 {
|
||||
return nil
|
||||
}
|
||||
frames := len(data) / frameSize
|
||||
out := make([]float64, frames)
|
||||
idx := 0
|
||||
for i := 0; i < frames; i++ {
|
||||
var sum float64
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
off := idx + ch*bytesPerSample
|
||||
switch bits {
|
||||
case 32:
|
||||
sum += float64(math.Float32frombits(binary.LittleEndian.Uint32(data[off : off+4])))
|
||||
case 64:
|
||||
sum += math.Float64frombits(binary.LittleEndian.Uint64(data[off : off+8]))
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
out[i] = sum / float64(channels)
|
||||
idx += frameSize
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func isGUID(b [16]byte, sub uint32) bool {
|
||||
return binary.LittleEndian.Uint32(b[0:4]) == sub &&
|
||||
binary.LittleEndian.Uint16(b[4:6]) == 0x0000 &&
|
||||
binary.LittleEndian.Uint16(b[6:8]) == 0x0010 &&
|
||||
b[8] == 0x80 && b[9] == 0x00 &&
|
||||
b[10] == 0x00 && b[11] == 0xAA && b[12] == 0x00 && b[13] == 0x38 && b[14] == 0x9B && b[15] == 0x71
|
||||
}
|
||||
47
internal/audio/wav_extensible_test.go
Normal file
47
internal/audio/wav_extensible_test.go
Normal file
@ -0,0 +1,47 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeWAVExtensiblePCM(t *testing.T) {
|
||||
payload := []byte{0, 0, 0, 0}
|
||||
buf := &bytes.Buffer{}
|
||||
riffSize := 4 + (8 + 40) + (8 + len(payload))
|
||||
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, uint32(riffSize))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 40)
|
||||
writeU16(buf, 0xFFFE)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*2)
|
||||
writeU16(buf, 2)
|
||||
writeU16(buf, 16)
|
||||
writeU16(buf, 22)
|
||||
writeU16(buf, 16)
|
||||
writeU32(buf, 1)
|
||||
buf.Write([]byte{
|
||||
0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00,
|
||||
0x10, 0x00,
|
||||
0x80, 0x00,
|
||||
0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71,
|
||||
})
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, uint32(len(payload)))
|
||||
buf.Write(payload)
|
||||
|
||||
pcm, err := DecodeBytes(buf.Bytes(), Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
118
internal/audio/wav_extra_test.go
Normal file
118
internal/audio/wav_extra_test.go
Normal file
@ -0,0 +1,118 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeWAVWithExtraChunk(t *testing.T) {
|
||||
payload := []byte{0, 0, 0, 0}
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, uint32(4+(8+16)+(8+5)+(8+len(payload))))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 1)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*2)
|
||||
writeU16(buf, 2)
|
||||
writeU16(buf, 16)
|
||||
|
||||
buf.WriteString("JUNK")
|
||||
writeU32(buf, 5)
|
||||
buf.Write([]byte{1, 2, 3, 4, 5})
|
||||
buf.WriteByte(0)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, uint32(len(payload)))
|
||||
buf.Write(payload)
|
||||
|
||||
pcm, err := DecodeBytes(buf.Bytes(), Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) == 0 {
|
||||
t.Fatalf("empty samples")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVMissingData(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4+(8+16))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 1)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*2)
|
||||
writeU16(buf, 2)
|
||||
writeU16(buf, 16)
|
||||
|
||||
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
|
||||
t.Fatalf("expected error for missing data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVHeaderOnly(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4)
|
||||
buf.WriteString("WAVE")
|
||||
if _, err := decodeWAV(bytes.NewReader(buf.Bytes())); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVInvalidChannels(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4+(8+16)+(8+2))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 1)
|
||||
writeU16(buf, 0)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*2)
|
||||
writeU16(buf, 2)
|
||||
writeU16(buf, 16)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, 2)
|
||||
buf.Write([]byte{0, 0})
|
||||
|
||||
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
|
||||
t.Fatalf("expected error for channels")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVFloatUnsupportedBits(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4+(8+16)+(8+3))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 3)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*3)
|
||||
writeU16(buf, 3)
|
||||
writeU16(buf, 24)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, 3)
|
||||
buf.Write([]byte{0, 0, 0})
|
||||
|
||||
if _, err := DecodeBytes(buf.Bytes(), Options{}); err == nil {
|
||||
t.Fatalf("expected error for float bit depth")
|
||||
}
|
||||
}
|
||||
77
internal/audio/wav_float_test.go
Normal file
77
internal/audio/wav_float_test.go
Normal file
@ -0,0 +1,77 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeWAVFloat32(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4+(8+16)+(8+8))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 3)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*4)
|
||||
writeU16(buf, 4)
|
||||
writeU16(buf, 32)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, 8)
|
||||
_ = binary.Write(buf, binary.LittleEndian, float32(0.5))
|
||||
_ = binary.Write(buf, binary.LittleEndian, float32(-0.25))
|
||||
|
||||
pcm, err := DecodeBytes(buf.Bytes(), Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) != 2 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVFloat64(t *testing.T) {
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, 4+(8+16)+(8+16))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, 3)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, 44100)
|
||||
writeU32(buf, 44100*8)
|
||||
writeU16(buf, 8)
|
||||
writeU16(buf, 64)
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, 16)
|
||||
_ = binary.Write(buf, binary.LittleEndian, float64(0.5))
|
||||
_ = binary.Write(buf, binary.LittleEndian, float64(-0.25))
|
||||
|
||||
pcm, err := DecodeBytes(buf.Bytes(), Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) != 2 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func writeU16(buf *bytes.Buffer, v uint16) {
|
||||
buf.WriteByte(byte(v))
|
||||
buf.WriteByte(byte(v >> 8))
|
||||
}
|
||||
|
||||
func writeU32(buf *bytes.Buffer, v uint32) {
|
||||
buf.WriteByte(byte(v))
|
||||
buf.WriteByte(byte(v >> 8))
|
||||
buf.WriteByte(byte(v >> 16))
|
||||
buf.WriteByte(byte(v >> 24))
|
||||
}
|
||||
101
internal/audio/wav_pcm_test.go
Normal file
101
internal/audio/wav_pcm_test.go
Normal file
@ -0,0 +1,101 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDecodeWAVPCM8(t *testing.T) {
|
||||
data := makeWAVPCM(8, []int32{0, 64, -64, 0}, 44100)
|
||||
pcm, err := DecodeBytes(data, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) != 4 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVPCM24(t *testing.T) {
|
||||
data := makeWAVPCM(24, []int32{0, 100000, -100000, 0}, 44100)
|
||||
pcm, err := DecodeBytes(data, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) != 4 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVPCM32(t *testing.T) {
|
||||
data := makeWAVPCM(32, []int32{0, 100000, -100000, 0}, 44100)
|
||||
pcm, err := DecodeBytes(data, Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("DecodeBytes: %v", err)
|
||||
}
|
||||
if len(pcm.Samples) != 4 {
|
||||
t.Fatalf("samples = %d", len(pcm.Samples))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVUnsupportedFormat(t *testing.T) {
|
||||
data := makeWAVCustom(7, 16, []byte{0, 0}, 44100)
|
||||
if _, err := DecodeBytes(data, Options{}); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeWAVUnsupportedBits(t *testing.T) {
|
||||
data := makeWAVCustom(1, 12, []byte{0, 0}, 44100)
|
||||
if _, err := DecodeBytes(data, Options{}); err == nil {
|
||||
t.Fatalf("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func makeWAVPCM(bits int, samples []int32, sampleRate int) []byte {
|
||||
data := &bytes.Buffer{}
|
||||
for _, s := range samples {
|
||||
switch bits {
|
||||
case 8:
|
||||
b := byte(int(s) + 128)
|
||||
data.WriteByte(b)
|
||||
case 16:
|
||||
_ = binary.Write(data, binary.LittleEndian, int16(s))
|
||||
case 24:
|
||||
v := uint32(int32(s))
|
||||
data.WriteByte(byte(v))
|
||||
data.WriteByte(byte(v >> 8))
|
||||
data.WriteByte(byte(v >> 16))
|
||||
case 32:
|
||||
_ = binary.Write(data, binary.LittleEndian, int32(s))
|
||||
}
|
||||
}
|
||||
return makeWAVCustom(1, bits, data.Bytes(), sampleRate)
|
||||
}
|
||||
|
||||
func makeWAVCustom(format uint16, bits int, payload []byte, sampleRate int) []byte {
|
||||
buf := &bytes.Buffer{}
|
||||
riffSize := 4 + (8 + 16) + (8 + len(payload))
|
||||
|
||||
buf.WriteString("RIFF")
|
||||
writeU32(buf, uint32(riffSize))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
writeU32(buf, 16)
|
||||
writeU16(buf, format)
|
||||
writeU16(buf, 1)
|
||||
writeU32(buf, uint32(sampleRate))
|
||||
byteRate := sampleRate * (bits / 8)
|
||||
writeU32(buf, uint32(byteRate))
|
||||
blockAlign := bits / 8
|
||||
writeU16(buf, uint16(blockAlign))
|
||||
writeU16(buf, uint16(bits))
|
||||
|
||||
buf.WriteString("data")
|
||||
writeU32(buf, uint32(len(payload)))
|
||||
buf.Write(payload)
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
38
internal/audio/wav_test.go
Normal file
38
internal/audio/wav_test.go
Normal file
@ -0,0 +1,38 @@
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
func makeWAV(samples []int16, sampleRate int, channels int) []byte {
|
||||
if channels < 1 {
|
||||
channels = 1
|
||||
}
|
||||
dataLen := len(samples) * 2
|
||||
riffSize := 4 + (8 + 16) + (8 + dataLen)
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
buf.WriteString("RIFF")
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint32(riffSize))
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
buf.WriteString("fmt ")
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint32(16))
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint16(1))
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint16(channels))
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint32(sampleRate))
|
||||
byteRate := sampleRate * channels * 2
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint32(byteRate))
|
||||
blockAlign := channels * 2
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint16(blockAlign))
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint16(16))
|
||||
|
||||
buf.WriteString("data")
|
||||
_ = binary.Write(buf, binary.LittleEndian, uint32(dataLen))
|
||||
for _, s := range samples {
|
||||
_ = binary.Write(buf, binary.LittleEndian, s)
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
40
internal/dsp/fft.go
Normal file
40
internal/dsp/fft.go
Normal file
@ -0,0 +1,40 @@
|
||||
// Package dsp provides spectral analysis utilities.
|
||||
package dsp
|
||||
|
||||
import "math"
|
||||
|
||||
// FFTInPlace computes the in-place FFT for length power-of-two slices.
|
||||
func FFTInPlace(x []complex128) {
|
||||
n := len(x)
|
||||
if n <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
// Bit-reversal permutation.
|
||||
j := 0
|
||||
for i := 1; i < n; i++ {
|
||||
bit := n >> 1
|
||||
for ; j&bit != 0; bit >>= 1 {
|
||||
j &= ^bit
|
||||
}
|
||||
j |= bit
|
||||
if i < j {
|
||||
x[i], x[j] = x[j], x[i]
|
||||
}
|
||||
}
|
||||
|
||||
for size := 2; size <= n; size <<= 1 {
|
||||
angle := -2 * math.Pi / float64(size)
|
||||
wlen := complex(math.Cos(angle), math.Sin(angle))
|
||||
for i := 0; i < n; i += size {
|
||||
w := complex(1, 0)
|
||||
for j := 0; j < size/2; j++ {
|
||||
u := x[i+j]
|
||||
v := w * x[i+j+size/2]
|
||||
x[i+j] = u + v
|
||||
x[i+j+size/2] = u - v
|
||||
w *= wlen
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
13
internal/dsp/fft_test.go
Normal file
13
internal/dsp/fft_test.go
Normal file
@ -0,0 +1,13 @@
|
||||
package dsp
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestFFTImpulse(t *testing.T) {
|
||||
x := []complex128{1, 0, 0, 0}
|
||||
FFTInPlace(x)
|
||||
for i, v := range x {
|
||||
if real(v) < 0.99 || real(v) > 1.01 || imag(v) != 0 {
|
||||
t.Fatalf("bin %d = %v", i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
98
internal/dsp/spectrogram.go
Normal file
98
internal/dsp/spectrogram.go
Normal file
@ -0,0 +1,98 @@
|
||||
// Package dsp provides spectral analysis utilities.
|
||||
package dsp
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// Spectrogram contains log-magnitude FFT frames.
|
||||
type Spectrogram struct {
|
||||
Frames int
|
||||
Bins int
|
||||
Values []float64
|
||||
Min float64
|
||||
Max float64
|
||||
SampleRate int
|
||||
WindowSize int
|
||||
BinHz float64
|
||||
}
|
||||
|
||||
// HannWindow returns a Hann window of length n.
|
||||
func HannWindow(n int) []float64 {
|
||||
w := make([]float64, n)
|
||||
if n == 1 {
|
||||
w[0] = 1
|
||||
return w
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
w[i] = 0.5 - 0.5*math.Cos(2*math.Pi*float64(i)/float64(n-1))
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
||||
// ComputeSpectrogram computes a log-magnitude spectrogram.
|
||||
func ComputeSpectrogram(samples []float64, sampleRate, windowSize, hopSize int) Spectrogram {
|
||||
if windowSize <= 0 {
|
||||
windowSize = 2048
|
||||
}
|
||||
if hopSize <= 0 {
|
||||
hopSize = windowSize / 4
|
||||
}
|
||||
if hopSize <= 0 {
|
||||
hopSize = 1
|
||||
}
|
||||
if sampleRate <= 0 {
|
||||
sampleRate = 44100
|
||||
}
|
||||
|
||||
frames := 1
|
||||
if len(samples) > windowSize {
|
||||
frames = 1 + (len(samples)-windowSize+hopSize-1)/hopSize
|
||||
}
|
||||
bins := windowSize/2 + 1
|
||||
values := make([]float64, frames*bins)
|
||||
|
||||
window := HannWindow(windowSize)
|
||||
minVal := math.Inf(1)
|
||||
maxVal := math.Inf(-1)
|
||||
eps := 1e-9
|
||||
|
||||
frame := make([]complex128, windowSize)
|
||||
for f := 0; f < frames; f++ {
|
||||
start := f * hopSize
|
||||
for i := 0; i < windowSize; i++ {
|
||||
idx := start + i
|
||||
if idx < len(samples) {
|
||||
frame[i] = complex(samples[idx]*window[i], 0)
|
||||
} else {
|
||||
frame[i] = 0
|
||||
}
|
||||
}
|
||||
FFTInPlace(frame)
|
||||
for b := 0; b < bins; b++ {
|
||||
re := real(frame[b])
|
||||
im := imag(frame[b])
|
||||
mag := math.Sqrt(re*re + im*im)
|
||||
db := 20 * math.Log10(mag+eps)
|
||||
values[f*bins+b] = db
|
||||
if db < minVal {
|
||||
minVal = db
|
||||
}
|
||||
if db > maxVal {
|
||||
maxVal = db
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
binHz := float64(sampleRate) / float64(windowSize)
|
||||
return Spectrogram{
|
||||
Frames: frames,
|
||||
Bins: bins,
|
||||
Values: values,
|
||||
Min: minVal,
|
||||
Max: maxVal,
|
||||
SampleRate: sampleRate,
|
||||
WindowSize: windowSize,
|
||||
BinHz: binHz,
|
||||
}
|
||||
}
|
||||
51
internal/dsp/spectrogram_test.go
Normal file
51
internal/dsp/spectrogram_test.go
Normal file
@ -0,0 +1,51 @@
|
||||
package dsp
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestComputeSpectrogram(t *testing.T) {
|
||||
samples := make([]float64, 4096)
|
||||
for i := range samples {
|
||||
samples[i] = 0.5
|
||||
}
|
||||
spec := ComputeSpectrogram(samples, 44100, 1024, 256)
|
||||
if spec.Frames <= 0 || spec.Bins <= 0 {
|
||||
t.Fatalf("invalid spec size")
|
||||
}
|
||||
if len(spec.Values) != spec.Frames*spec.Bins {
|
||||
t.Fatalf("values len mismatch")
|
||||
}
|
||||
if spec.Min >= spec.Max {
|
||||
t.Fatalf("min/max not set")
|
||||
}
|
||||
if spec.BinHz <= 0 {
|
||||
t.Fatalf("invalid bin hz")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHannWindow(t *testing.T) {
|
||||
w1 := HannWindow(1)
|
||||
if len(w1) != 1 || w1[0] != 1 {
|
||||
t.Fatalf("hann size 1")
|
||||
}
|
||||
w := HannWindow(4)
|
||||
if len(w) != 4 {
|
||||
t.Fatalf("hann size 4")
|
||||
}
|
||||
if w[0] != 0 || w[3] != 0 {
|
||||
t.Fatalf("hann endpoints")
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeSpectrogramDefaults(t *testing.T) {
|
||||
samples := make([]float64, 100)
|
||||
spec := ComputeSpectrogram(samples, 0, 0, 0)
|
||||
if spec.SampleRate != 44100 {
|
||||
t.Fatalf("sample rate default = %d", spec.SampleRate)
|
||||
}
|
||||
if spec.WindowSize != 2048 {
|
||||
t.Fatalf("window default = %d", spec.WindowSize)
|
||||
}
|
||||
if spec.Frames != 1 {
|
||||
t.Fatalf("frames = %d", spec.Frames)
|
||||
}
|
||||
}
|
||||
92
internal/render/palette.go
Normal file
92
internal/render/palette.go
Normal file
@ -0,0 +1,92 @@
|
||||
// Package render turns spectrograms into images.
|
||||
package render
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"image/color"
|
||||
)
|
||||
|
||||
// Palette maps a normalized value to a color.
|
||||
type Palette func(t float64) color.RGBA
|
||||
|
||||
type stop struct {
|
||||
pos float64
|
||||
c color.RGBA
|
||||
}
|
||||
|
||||
// PaletteByName returns a palette for a given name.
|
||||
func PaletteByName(name string) (Palette, error) {
|
||||
switch name {
|
||||
case "classic":
|
||||
return gradient([]stop{
|
||||
{0.0, rgb(0, 0, 0)},
|
||||
{0.2, rgb(0, 32, 96)},
|
||||
{0.45, rgb(0, 160, 200)},
|
||||
{0.7, rgb(255, 180, 0)},
|
||||
{1.0, rgb(255, 255, 255)},
|
||||
}), nil
|
||||
case "magma":
|
||||
return gradient([]stop{
|
||||
{0.0, rgb(0, 0, 4)},
|
||||
{0.25, rgb(59, 12, 87)},
|
||||
{0.5, rgb(180, 54, 122)},
|
||||
{0.75, rgb(251, 140, 60)},
|
||||
{1.0, rgb(252, 253, 191)},
|
||||
}), nil
|
||||
case "inferno":
|
||||
return gradient([]stop{
|
||||
{0.0, rgb(0, 0, 4)},
|
||||
{0.25, rgb(61, 9, 101)},
|
||||
{0.5, rgb(187, 55, 84)},
|
||||
{0.75, rgb(249, 142, 8)},
|
||||
{1.0, rgb(252, 255, 164)},
|
||||
}), nil
|
||||
case "viridis":
|
||||
return gradient([]stop{
|
||||
{0.0, rgb(68, 1, 84)},
|
||||
{0.25, rgb(58, 82, 139)},
|
||||
{0.5, rgb(32, 144, 140)},
|
||||
{0.75, rgb(94, 201, 98)},
|
||||
{1.0, rgb(253, 231, 37)},
|
||||
}), nil
|
||||
case "gray", "grey":
|
||||
return gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 255, 255)}}), nil
|
||||
default:
|
||||
return nil, errors.New("unknown palette")
|
||||
}
|
||||
}
|
||||
|
||||
func gradient(stops []stop) Palette {
|
||||
return func(t float64) color.RGBA {
|
||||
if t <= 0 {
|
||||
return stops[0].c
|
||||
}
|
||||
if t >= 1 {
|
||||
return stops[len(stops)-1].c
|
||||
}
|
||||
for i := 0; i < len(stops)-1; i++ {
|
||||
if t >= stops[i].pos && t <= stops[i+1].pos {
|
||||
span := stops[i+1].pos - stops[i].pos
|
||||
if span <= 0 {
|
||||
return stops[i+1].c
|
||||
}
|
||||
local := (t - stops[i].pos) / span
|
||||
return lerp(stops[i].c, stops[i+1].c, local)
|
||||
}
|
||||
}
|
||||
return stops[len(stops)-1].c
|
||||
}
|
||||
}
|
||||
|
||||
func lerp(a, b color.RGBA, t float64) color.RGBA {
|
||||
return color.RGBA{
|
||||
R: uint8(float64(a.R) + (float64(b.R)-float64(a.R))*t),
|
||||
G: uint8(float64(a.G) + (float64(b.G)-float64(a.G))*t),
|
||||
B: uint8(float64(a.B) + (float64(b.B)-float64(a.B))*t),
|
||||
A: 255,
|
||||
}
|
||||
}
|
||||
|
||||
func rgb(r, g, b uint8) color.RGBA {
|
||||
return color.RGBA{R: r, G: g, B: b, A: 255}
|
||||
}
|
||||
102
internal/render/render.go
Normal file
102
internal/render/render.go
Normal file
@ -0,0 +1,102 @@
|
||||
// Package render turns spectrograms into images.
|
||||
package render
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"image"
|
||||
"math"
|
||||
|
||||
"github.com/steipete/songsee/internal/dsp"
|
||||
)
|
||||
|
||||
// Options configures spectrogram rendering.
|
||||
type Options struct {
|
||||
Width int
|
||||
Height int
|
||||
MinFreq float64
|
||||
MaxFreq float64
|
||||
Palette Palette
|
||||
MinDB float64
|
||||
MaxDB float64
|
||||
ClampDB bool
|
||||
FlipVert bool
|
||||
}
|
||||
|
||||
// Spectrogram renders a spectrogram into an RGBA image.
|
||||
func Spectrogram(spec dsp.Spectrogram, opts Options) (*image.RGBA, error) {
|
||||
if opts.Width <= 0 || opts.Height <= 0 {
|
||||
return nil, fmt.Errorf("invalid output size")
|
||||
}
|
||||
if opts.Palette == nil {
|
||||
return nil, fmt.Errorf("palette required")
|
||||
}
|
||||
|
||||
minDB := spec.Min
|
||||
maxDB := spec.Max
|
||||
if opts.ClampDB {
|
||||
minDB = opts.MinDB
|
||||
maxDB = opts.MaxDB
|
||||
}
|
||||
if maxDB <= minDB {
|
||||
maxDB = minDB + 1
|
||||
}
|
||||
|
||||
minBin := 0
|
||||
maxBin := spec.Bins - 1
|
||||
if opts.MinFreq > 0 {
|
||||
minBin = int(opts.MinFreq / spec.BinHz)
|
||||
}
|
||||
if opts.MaxFreq > 0 {
|
||||
maxBin = int(opts.MaxFreq / spec.BinHz)
|
||||
}
|
||||
if minBin < 0 {
|
||||
minBin = 0
|
||||
}
|
||||
if maxBin >= spec.Bins {
|
||||
maxBin = spec.Bins - 1
|
||||
}
|
||||
if maxBin <= minBin {
|
||||
minBin = 0
|
||||
maxBin = spec.Bins - 1
|
||||
}
|
||||
binSpan := maxBin - minBin
|
||||
|
||||
img := image.NewRGBA(image.Rect(0, 0, opts.Width, opts.Height))
|
||||
frames := spec.Frames
|
||||
bins := spec.Bins
|
||||
for x := 0; x < opts.Width; x++ {
|
||||
frame := 0
|
||||
if frames > 1 && opts.Width > 1 {
|
||||
frame = int(math.Round(float64(x) * float64(frames-1) / float64(opts.Width-1)))
|
||||
}
|
||||
frameOffset := frame * bins
|
||||
for y := 0; y < opts.Height; y++ {
|
||||
pos := 0.0
|
||||
if opts.Height > 1 {
|
||||
pos = float64(y) / float64(opts.Height-1)
|
||||
}
|
||||
bin := minBin + int(math.Round((1-pos)*float64(binSpan)))
|
||||
if bin < minBin {
|
||||
bin = minBin
|
||||
}
|
||||
if bin > maxBin {
|
||||
bin = maxBin
|
||||
}
|
||||
val := spec.Values[frameOffset+bin]
|
||||
norm := (val - minDB) / (maxDB - minDB)
|
||||
if norm < 0 {
|
||||
norm = 0
|
||||
}
|
||||
if norm > 1 {
|
||||
norm = 1
|
||||
}
|
||||
c := opts.Palette(norm)
|
||||
ypos := y
|
||||
if opts.FlipVert {
|
||||
ypos = opts.Height - 1 - y
|
||||
}
|
||||
img.SetRGBA(x, ypos, c)
|
||||
}
|
||||
}
|
||||
return img, nil
|
||||
}
|
||||
154
internal/render/render_test.go
Normal file
154
internal/render/render_test.go
Normal file
@ -0,0 +1,154 @@
|
||||
package render
|
||||
|
||||
import (
|
||||
"image/color"
|
||||
"testing"
|
||||
|
||||
"github.com/steipete/songsee/internal/dsp"
|
||||
)
|
||||
|
||||
func TestPaletteByName(t *testing.T) {
|
||||
names := []string{"classic", "magma", "inferno", "viridis", "gray", "grey"}
|
||||
for _, name := range names {
|
||||
if _, err := PaletteByName(name); err != nil {
|
||||
t.Fatalf("palette %s: %v", name, err)
|
||||
}
|
||||
}
|
||||
if _, err := PaletteByName("nope"); err == nil {
|
||||
t.Fatalf("expected error for unknown palette")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderSpectrogram(t *testing.T) {
|
||||
spec := dsp.Spectrogram{
|
||||
Frames: 2,
|
||||
Bins: 2,
|
||||
Values: []float64{-20, -5, -10, -1},
|
||||
Min: -20,
|
||||
Max: -1,
|
||||
BinHz: 100,
|
||||
}
|
||||
img, err := Spectrogram(spec, Options{
|
||||
Width: 4,
|
||||
Height: 4,
|
||||
Palette: func(t float64) color.RGBA { return color.RGBA{R: uint8(255 * t), A: 255} },
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderSpectrogram: %v", err)
|
||||
}
|
||||
if img.Bounds().Dx() != 4 || img.Bounds().Dy() != 4 {
|
||||
t.Fatalf("unexpected bounds")
|
||||
}
|
||||
c1 := img.RGBAAt(0, 0)
|
||||
c2 := img.RGBAAt(3, 3)
|
||||
if c1 == c2 {
|
||||
t.Fatalf("expected varying pixels")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderSpectrogramErrors(t *testing.T) {
|
||||
spec := dsp.Spectrogram{
|
||||
Frames: 1,
|
||||
Bins: 1,
|
||||
Values: []float64{0},
|
||||
Min: 0,
|
||||
Max: 1,
|
||||
BinHz: 100,
|
||||
}
|
||||
if _, err := Spectrogram(spec, Options{Width: 0, Height: 1, Palette: func(float64) color.RGBA { return color.RGBA{} }}); err == nil {
|
||||
t.Fatalf("expected size error")
|
||||
}
|
||||
if _, err := Spectrogram(spec, Options{Width: 1, Height: 1}); err == nil {
|
||||
t.Fatalf("expected palette error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderSpectrogramClampAndRange(t *testing.T) {
|
||||
spec := dsp.Spectrogram{
|
||||
Frames: 3,
|
||||
Bins: 4,
|
||||
Values: []float64{-80, -40, -20, 0, -70, -35, -15, -2, -60, -30, -10, -1},
|
||||
Min: -80,
|
||||
Max: 0,
|
||||
BinHz: 100,
|
||||
}
|
||||
img, err := Spectrogram(spec, Options{
|
||||
Width: 3,
|
||||
Height: 2,
|
||||
MinFreq: 50,
|
||||
MaxFreq: 250,
|
||||
Palette: func(t float64) color.RGBA { return color.RGBA{B: uint8(255 * t), A: 255} },
|
||||
MinDB: -60,
|
||||
MaxDB: -10,
|
||||
ClampDB: true,
|
||||
FlipVert: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderSpectrogram: %v", err)
|
||||
}
|
||||
if img.Bounds().Dx() != 3 || img.Bounds().Dy() != 2 {
|
||||
t.Fatalf("unexpected bounds")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGradientEndpoints(t *testing.T) {
|
||||
p := gradient([]stop{{0, rgb(0, 0, 0)}, {1, rgb(255, 0, 0)}})
|
||||
if c := p(0); c.R != 0 || c.G != 0 || c.B != 0 {
|
||||
t.Fatalf("start color mismatch")
|
||||
}
|
||||
if c := p(1); c.R != 255 || c.G != 0 || c.B != 0 {
|
||||
t.Fatalf("end color mismatch")
|
||||
}
|
||||
if c := p(0.5); c.R == 0 || c.R == 255 {
|
||||
t.Fatalf("mid color not interpolated")
|
||||
}
|
||||
if c := p(-1); c.R != 0 {
|
||||
t.Fatalf("clamp low")
|
||||
}
|
||||
if c := p(2); c.R != 255 {
|
||||
t.Fatalf("clamp high")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderSpectrogramSinglePixel(t *testing.T) {
|
||||
spec := dsp.Spectrogram{
|
||||
Frames: 1,
|
||||
Bins: 1,
|
||||
Values: []float64{-10},
|
||||
Min: -10,
|
||||
Max: -10,
|
||||
BinHz: 100,
|
||||
}
|
||||
img, err := Spectrogram(spec, Options{
|
||||
Width: 1,
|
||||
Height: 1,
|
||||
Palette: func(_ float64) color.RGBA { return color.RGBA{G: 200, A: 255} },
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderSpectrogram: %v", err)
|
||||
}
|
||||
if img.Bounds().Dx() != 1 || img.Bounds().Dy() != 1 {
|
||||
t.Fatalf("unexpected bounds")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderSpectrogramRangeReset(t *testing.T) {
|
||||
spec := dsp.Spectrogram{
|
||||
Frames: 2,
|
||||
Bins: 3,
|
||||
Values: []float64{-10, -5, -1, -10, -5, -1},
|
||||
Min: -10,
|
||||
Max: -1,
|
||||
BinHz: 100,
|
||||
}
|
||||
_, err := Spectrogram(spec, Options{
|
||||
Width: 2,
|
||||
Height: 2,
|
||||
MinFreq: 1000,
|
||||
MaxFreq: 200,
|
||||
Palette: func(_ float64) color.RGBA { return color.RGBA{R: 50, A: 255} },
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderSpectrogram: %v", err)
|
||||
}
|
||||
}
|
||||
BIN
testdata/sine.mp3
vendored
Normal file
BIN
testdata/sine.mp3
vendored
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user