fix: keep embedding snapshot state local (#38)
This commit is contained in:
parent
af3b9bf178
commit
ed929a92eb
@ -9,6 +9,7 @@ All notable changes to `discrawl` will be documented in this file.
|
||||
- normalized message text is now sanitized before it reaches SQLite and FTS5, repairing malformed UTF-8 and stripping invisible/control-character noise that can poison search content
|
||||
- local embedding providers now support OpenAI-compatible endpoints, Ollama, and llama.cpp, and `doctor` can probe the configured provider before you queue vectors
|
||||
- `embed` now drains the queued embedding backlog in bounded batches, requeues safely on provider throttling, and drops stale stored vectors when messages no longer have embeddable content
|
||||
- Git-backed snapshots now keep embedding queue state and generated vectors local to each archive, so subscribers no longer inherit misleading embedding backlog metadata. (#38) Thanks @GaosCode.
|
||||
|
||||
## 0.3.0 - 2026-04-21
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/bwmarrin/discordgo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
@ -497,12 +498,12 @@ func TestEmbedCommandDrainsBoundedBacklog(t *testing.T) {
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/embeddings", r.URL.Path)
|
||||
assert.Equal(t, "/embeddings", r.URL.Path)
|
||||
var req struct {
|
||||
Input []string `json:"input"`
|
||||
}
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
require.Len(t, req.Input, 1)
|
||||
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
assert.Len(t, req.Input, 1)
|
||||
_, _ = w.Write([]byte(`{"data":[{"index":0,"embedding":[1,2]}]}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
@ -745,7 +746,7 @@ func TestDoctorChecksEnabledLocalEmbeddingProvider(t *testing.T) {
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/api/embed", r.URL.Path)
|
||||
assert.Equal(t, "/api/embed", r.URL.Path)
|
||||
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2,3]]}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
@ -16,12 +17,12 @@ func TestOllamaProviderEmbeds(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/api/embed", r.URL.Path)
|
||||
require.Equal(t, http.MethodPost, r.Method)
|
||||
assert.Equal(t, "/api/embed", r.URL.Path)
|
||||
assert.Equal(t, http.MethodPost, r.Method)
|
||||
var req ollamaEmbedRequest
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
require.Equal(t, "nomic-embed-text", req.Model)
|
||||
require.Equal(t, []string{"abcd", "xy"}, req.Input)
|
||||
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
assert.Equal(t, "nomic-embed-text", req.Model)
|
||||
assert.Equal(t, []string{"abcd", "xy"}, req.Input)
|
||||
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2,3],[4,5,6]]}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
@ -44,12 +45,12 @@ func TestOllamaProviderEmbeds(t *testing.T) {
|
||||
|
||||
func TestOpenAICompatibleProviderEmbedsAndUsesAuth(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/embeddings", r.URL.Path)
|
||||
require.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||
assert.Equal(t, "/embeddings", r.URL.Path)
|
||||
assert.Equal(t, "Bearer secret", r.Header.Get("Authorization"))
|
||||
var req openAIEmbeddingRequest
|
||||
require.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
require.Equal(t, "local-model", req.Model)
|
||||
require.Equal(t, []string{"one", "two"}, req.Input)
|
||||
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
assert.Equal(t, "local-model", req.Model)
|
||||
assert.Equal(t, []string{"one", "two"}, req.Input)
|
||||
_, _ = w.Write([]byte(`{
|
||||
"model":"local-model",
|
||||
"data":[
|
||||
@ -136,7 +137,7 @@ func TestCheckProviderProbesLocalProvider(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
require.Equal(t, "/api/embed", r.URL.Path)
|
||||
assert.Equal(t, "/api/embed", r.URL.Path)
|
||||
_, _ = w.Write([]byte(`{"model":"nomic-embed-text","embeddings":[[1,2]]}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
@ -211,7 +211,6 @@ func (s *Store) migrate(ctx context.Context) error {
|
||||
if err := s.setSchemaVersion(ctx, storeSchemaVersion); err != nil {
|
||||
return err
|
||||
}
|
||||
currentVersion = storeSchemaVersion
|
||||
}
|
||||
if version, err := s.schemaVersion(ctx); err != nil {
|
||||
return err
|
||||
@ -501,6 +500,7 @@ func columnExists(ctx context.Context, tx *sql.Tx, table, column string) (bool,
|
||||
}
|
||||
return false, rows.Err()
|
||||
}
|
||||
|
||||
func (s *Store) ensureFTSRowIDs(ctx context.Context) error {
|
||||
var version sql.NullString
|
||||
err := s.db.QueryRowContext(ctx, `
|
||||
|
||||
Loading…
Reference in New Issue
Block a user