Add brabble STT engine and wiring
This commit is contained in:
parent
ec7f1ed87a
commit
1c1cebc704
@ -78,6 +78,9 @@ type NodeConfig struct {
|
||||
QuickActions bool
|
||||
QuickPingMessage string
|
||||
RoutingPlugin string
|
||||
STTEngine string
|
||||
STTCommand string
|
||||
STTArgs string
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -134,6 +137,9 @@ func usage() {
|
||||
fmt.Fprintln(os.Stderr, " -ping-interval Ping interval (default 30s)")
|
||||
fmt.Fprintln(os.Stderr, " -quick-actions Enable built-in quick actions (default true)")
|
||||
fmt.Fprintln(os.Stderr, " -router Routing plugin name (default default)")
|
||||
fmt.Fprintln(os.Stderr, " -stt-engine STT engine (line, brabble)")
|
||||
fmt.Fprintln(os.Stderr, " -stt-command STT command for brabble (default brabble)")
|
||||
fmt.Fprintln(os.Stderr, " -stt-args STT args for brabble (space-separated)")
|
||||
fmt.Fprintln(os.Stderr, " -ping-message Message used for telegram ping quick action (default \"Ping.\")")
|
||||
fmt.Fprintln(os.Stderr, " -mdns Advertise mDNS presence (default true)")
|
||||
fmt.Fprintln(os.Stderr, " -mdns-service mDNS service type (default _clawdis-node._tcp)")
|
||||
@ -170,6 +176,9 @@ func parseFlags(cmd string, args []string) NodeConfig {
|
||||
quickActions := fs.Bool("quick-actions", true, "enable built-in quick actions")
|
||||
quickPingMessage := fs.String("ping-message", "Ping.", "message used for telegram ping quick action")
|
||||
router := fs.String("router", "default", "routing plugin name")
|
||||
sttEngine := fs.String("stt-engine", "line", "STT engine (line, brabble)")
|
||||
sttCommand := fs.String("stt-command", "brabble", "STT command for brabble")
|
||||
sttArgs := fs.String("stt-args", "", "STT args for brabble (space-separated)")
|
||||
mdnsEnabled := fs.Bool("mdns", true, "advertise mDNS presence")
|
||||
mdnsService := fs.String("mdns-service", "_clawdis-node._tcp", "mDNS service type")
|
||||
mdnsDomain := fs.String("mdns-domain", "local.", "mDNS domain")
|
||||
@ -218,6 +227,9 @@ func parseFlags(cmd string, args []string) NodeConfig {
|
||||
QuickActions: *quickActions,
|
||||
QuickPingMessage: strings.TrimSpace(*quickPingMessage),
|
||||
RoutingPlugin: strings.TrimSpace(*router),
|
||||
STTEngine: strings.TrimSpace(*sttEngine),
|
||||
STTCommand: strings.TrimSpace(*sttCommand),
|
||||
STTArgs: strings.TrimSpace(*sttArgs),
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
@ -275,23 +287,42 @@ func runNode(cfg NodeConfig) error {
|
||||
}
|
||||
|
||||
var transcriptCh <-chan stt.Transcript
|
||||
if cfg.StdinMode || cfg.StdinPath != "" {
|
||||
var capture audio.Capture
|
||||
if cfg.StdinPath != "" {
|
||||
capture = audio.NewLineCaptureFromPath(cfg.StdinPath, logf)
|
||||
} else {
|
||||
capture = audio.NewLineCapture("stdin", os.Stdin, logf)
|
||||
engineName := strings.TrimSpace(cfg.STTEngine)
|
||||
if engineName == "" {
|
||||
engineName = "line"
|
||||
}
|
||||
switch engineName {
|
||||
case "line":
|
||||
if cfg.StdinMode || cfg.StdinPath != "" {
|
||||
var capture audio.Capture
|
||||
if cfg.StdinPath != "" {
|
||||
capture = audio.NewLineCaptureFromPath(cfg.StdinPath, logf)
|
||||
} else {
|
||||
capture = audio.NewLineCapture("stdin", os.Stdin, logf)
|
||||
}
|
||||
frames, err := capture.Start(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine := stt.NewLineEngine()
|
||||
transcriptCh, err = engine.Transcribe(ctx, frames, stt.Options{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logf("stt: %s capture=%s", engine.Name(), capture.Name())
|
||||
}
|
||||
frames, err := capture.Start(ctx)
|
||||
case "brabble":
|
||||
cmd := strings.TrimSpace(cfg.STTCommand)
|
||||
args := splitArgs(cfg.STTArgs)
|
||||
engine := stt.NewBrabbleEngine(stt.BrabbleConfig{Command: cmd, Args: args}, logf)
|
||||
var err error
|
||||
transcriptCh, err = engine.Transcribe(ctx, nil, stt.Options{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
engine := stt.NewLineEngine()
|
||||
transcriptCh, err = engine.Transcribe(ctx, frames, stt.Options{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logf("stt: %s capture=%s", engine.Name(), capture.Name())
|
||||
logf("stt: %s cmd=%s", engine.Name(), formatCommand(cmd, args))
|
||||
default:
|
||||
return fmt.Errorf("unknown stt engine: %s", engineName)
|
||||
}
|
||||
|
||||
var mdnsCleanup func()
|
||||
@ -1252,6 +1283,25 @@ func randomID(n int) string {
|
||||
return hex.EncodeToString(buf)
|
||||
}
|
||||
|
||||
func splitArgs(input string) []string {
|
||||
fields := strings.Fields(strings.TrimSpace(input))
|
||||
if len(fields) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fields
|
||||
}
|
||||
|
||||
func formatCommand(cmd string, args []string) string {
|
||||
cmd = strings.TrimSpace(cmd)
|
||||
if cmd == "" {
|
||||
cmd = "brabble"
|
||||
}
|
||||
if len(args) == 0 {
|
||||
return cmd
|
||||
}
|
||||
return fmt.Sprintf("%s %s", cmd, strings.Join(args, " "))
|
||||
}
|
||||
|
||||
func splitCSV(value string) []string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
|
||||
165
modules/stt/brabble.go
Normal file
165
modules/stt/brabble.go
Normal file
@ -0,0 +1,165 @@
|
||||
package stt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/clawdbot/clawgo/modules/audio"
|
||||
)
|
||||
|
||||
type BrabbleConfig struct {
|
||||
Command string
|
||||
Args []string
|
||||
}
|
||||
|
||||
type BrabbleEngine struct {
|
||||
cfg BrabbleConfig
|
||||
logf func(string, ...any)
|
||||
}
|
||||
|
||||
func NewBrabbleEngine(cfg BrabbleConfig, logf func(string, ...any)) Engine {
|
||||
return &BrabbleEngine{cfg: cfg, logf: logf}
|
||||
}
|
||||
|
||||
func (e *BrabbleEngine) Name() string { return "brabble" }
|
||||
|
||||
func (e *BrabbleEngine) Transcribe(ctx context.Context, _ <-chan audio.Frame, _ Options) (<-chan Transcript, error) {
|
||||
cmdPath := strings.TrimSpace(e.cfg.Command)
|
||||
if cmdPath == "" {
|
||||
cmdPath = "brabble"
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, cmdPath, e.cfg.Args...)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := make(chan Transcript, 32)
|
||||
go func() {
|
||||
defer close(out)
|
||||
e.readLines(ctx, stdout, out)
|
||||
}()
|
||||
go e.logLines(ctx, stderr)
|
||||
go func() {
|
||||
_ = cmd.Wait()
|
||||
}()
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (e *BrabbleEngine) readLines(ctx context.Context, r io.Reader, out chan<- Transcript) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 8*1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
tr, ok := parseBrabbleLine(line)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case out <- tr:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) && e.logf != nil {
|
||||
e.logf("brabble read error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (e *BrabbleEngine) logLines(ctx context.Context, r io.Reader) {
|
||||
if e.logf == nil {
|
||||
return
|
||||
}
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
e.logf("brabble: %s", line)
|
||||
}
|
||||
}
|
||||
|
||||
type brabbleEvent struct {
|
||||
Type string `json:"type"`
|
||||
Event string `json:"event"`
|
||||
Text string `json:"text"`
|
||||
Transcript string `json:"transcript"`
|
||||
Utterance string `json:"utterance"`
|
||||
Final *bool `json:"final"`
|
||||
Payload json.RawMessage `json:"payload"`
|
||||
}
|
||||
|
||||
type brabblePayload struct {
|
||||
Text string `json:"text"`
|
||||
Transcript string `json:"transcript"`
|
||||
Utterance string `json:"utterance"`
|
||||
}
|
||||
|
||||
func parseBrabbleLine(line string) (Transcript, bool) {
|
||||
if strings.HasPrefix(line, "{") {
|
||||
var evt brabbleEvent
|
||||
if err := json.Unmarshal([]byte(line), &evt); err == nil {
|
||||
text := pickBrabbleText(evt.Text, evt.Transcript, evt.Utterance)
|
||||
if text == "" && len(evt.Payload) > 0 {
|
||||
var payload brabblePayload
|
||||
if err := json.Unmarshal(evt.Payload, &payload); err == nil {
|
||||
text = pickBrabbleText(payload.Text, payload.Transcript, payload.Utterance)
|
||||
}
|
||||
}
|
||||
if text == "" {
|
||||
return Transcript{}, false
|
||||
}
|
||||
final := true
|
||||
if evt.Final != nil {
|
||||
final = *evt.Final
|
||||
}
|
||||
if strings.Contains(strings.ToLower(evt.Type), "partial") || strings.Contains(strings.ToLower(evt.Event), "partial") {
|
||||
final = false
|
||||
}
|
||||
return Transcript{Text: text, Final: final, Timestamp: time.Now(), Source: "brabble"}, true
|
||||
}
|
||||
}
|
||||
return Transcript{Text: strings.TrimSpace(line), Final: true, Timestamp: time.Now(), Source: "brabble"}, true
|
||||
}
|
||||
|
||||
func pickBrabbleText(parts ...string) string {
|
||||
for _, part := range parts {
|
||||
if strings.TrimSpace(part) != "" {
|
||||
return strings.TrimSpace(part)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *BrabbleEngine) String() string {
|
||||
cmd := strings.TrimSpace(e.cfg.Command)
|
||||
if cmd == "" {
|
||||
cmd = "brabble"
|
||||
}
|
||||
if len(e.cfg.Args) == 0 {
|
||||
return cmd
|
||||
}
|
||||
return fmt.Sprintf("%s %s", cmd, strings.Join(e.cfg.Args, " "))
|
||||
}
|
||||
39
modules/stt/brabble_test.go
Normal file
39
modules/stt/brabble_test.go
Normal file
@ -0,0 +1,39 @@
|
||||
package stt
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseBrabbleLineJSON(t *testing.T) {
|
||||
line := `{"event":"transcript","text":"hey razor"}`
|
||||
tr, ok := parseBrabbleLine(line)
|
||||
if !ok {
|
||||
t.Fatalf("expected ok")
|
||||
}
|
||||
if tr.Text != "hey razor" {
|
||||
t.Fatalf("expected text")
|
||||
}
|
||||
if !tr.Final {
|
||||
t.Fatalf("expected final")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBrabbleLinePayload(t *testing.T) {
|
||||
line := `{"event":"transcript","payload":{"transcript":"hello"}}`
|
||||
tr, ok := parseBrabbleLine(line)
|
||||
if !ok {
|
||||
t.Fatalf("expected ok")
|
||||
}
|
||||
if tr.Text != "hello" {
|
||||
t.Fatalf("expected payload text")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBrabbleLinePlain(t *testing.T) {
|
||||
line := "bring me on telegram"
|
||||
tr, ok := parseBrabbleLine(line)
|
||||
if !ok {
|
||||
t.Fatalf("expected ok")
|
||||
}
|
||||
if tr.Text != line {
|
||||
t.Fatalf("expected same line")
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user