fix: keep embedding snapshot state local (#38)

This commit is contained in:
Peter Steinberger 2026-04-22 14:47:30 +01:00
parent af3b9bf178
commit ed929a92eb
4 changed files with 19 additions and 16 deletions

View File

@ -9,6 +9,7 @@ All notable changes to `discrawl` will be documented in this file.
- normalized message text is now sanitized before it reaches SQLite and FTS5, repairing malformed UTF-8 and stripping invisible/control-character noise that can poison search content
- local embedding providers now support OpenAI-compatible endpoints, Ollama, and llama.cpp, and `doctor` can probe the configured provider before you queue vectors
- `embed` now drains the queued embedding backlog in bounded batches, requeues safely on provider throttling, and drops stale stored vectors when messages no longer have embeddable content
- Git-backed snapshots now keep embedding queue state and generated vectors local to each archive, so subscribers no longer inherit misleading embedding backlog metadata. (#38) Thanks @GaosCode.
## 0.3.0 - 2026-04-21

View File

@ -14,6 +14,7 @@ import (
"time"
"github.com/bwmarrin/discordgo"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/steipete/discrawl/internal/config"
@ -497,12 +498,12 @@ func TestEmbedCommandDrainsBoundedBacklog(t *testing.T) {
dbPath := filepath.Join(dir, "discrawl.db")
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "/embeddings", r.URL.Path)
assert.Equal(t, "/embeddings", r.URL.Path)
var req struct {
Input []string `json:"input"`
}
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
require.Len(t, req.Input, 1)
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
assert.Len(t, req.Input, 1)
_, _ = w.Write([]byte(`{"data":[{"index":0,"embedding":[1,2]}]}`))
}))
defer server.Close()
@ -745,7 +746,7 @@ func TestDoctorChecksEnabledLocalEmbeddingProvider(t *testing.T) {
dbPath := filepath.Join(dir, "discrawl.db")
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "/api/embed", r.URL.Path)
assert.Equal(t, "/api/embed", r.URL.Path)
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2,3]]}`))
}))
defer server.Close()

View File

@ -7,6 +7,7 @@ import (
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/steipete/discrawl/internal/config"
@ -16,12 +17,12 @@ func TestOllamaProviderEmbeds(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "/api/embed", r.URL.Path)
require.Equal(t, http.MethodPost, r.Method)
assert.Equal(t, "/api/embed", r.URL.Path)
assert.Equal(t, http.MethodPost, r.Method)
var req ollamaEmbedRequest
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
require.Equal(t, "nomic-embed-text", req.Model)
require.Equal(t, []string{"abcd", "xy"}, req.Input)
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
assert.Equal(t, "nomic-embed-text", req.Model)
assert.Equal(t, []string{"abcd", "xy"}, req.Input)
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2,3],[4,5,6]]}`))
}))
defer server.Close()
@ -44,12 +45,12 @@ func TestOllamaProviderEmbeds(t *testing.T) {
func TestOpenAICompatibleProviderEmbedsAndUsesAuth(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "/embeddings", r.URL.Path)
require.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
assert.Equal(t, "/embeddings", r.URL.Path)
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
var req openAIEmbeddingRequest
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
require.Equal(t, "local-model", req.Model)
require.Equal(t, []string{"one", "two"}, req.Input)
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
assert.Equal(t, "local-model", req.Model)
assert.Equal(t, []string{"one", "two"}, req.Input)
_, _ = w.Write([]byte(`{
"model":"local-model",
"data":[
@ -136,7 +137,7 @@ func TestCheckProviderProbesLocalProvider(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "/api/embed", r.URL.Path)
assert.Equal(t, "/api/embed", r.URL.Path)
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2]]}`))
}))
defer server.Close()

View File

@ -211,7 +211,6 @@ func (s *Store) migrate(ctx context.Context) error {
if err := s.setSchemaVersion(ctx, storeSchemaVersion); err != nil {
return err
}
currentVersion = storeSchemaVersion
}
if version, err := s.schemaVersion(ctx); err != nil {
return err
@ -501,6 +500,7 @@ func columnExists(ctx context.Context, tx *sql.Tx, table, column string) (bool,
}
return false, rows.Err()
}
func (s *Store) ensureFTSRowIDs(ctx context.Context) error {
var version sql.NullString
err := s.db.QueryRowContext(ctx, `