fix(sync): log thread progress percentages

This commit is contained in:
Vincent Koc 2026-05-02 19:34:14 -07:00
parent ec8de7a53d
commit 5d8b59c79b
No known key found for this signature in database
4 changed files with 69 additions and 1 deletions

2
go.sum
View File

@ -62,6 +62,8 @@ github.com/vincentkoc/crawlkit v0.3.12 h1:2hs4DXk6LkI4sdbgnFU+mUNaC2gmhQfkMx5C+b
github.com/vincentkoc/crawlkit v0.3.12/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0=
github.com/vincentkoc/crawlkit v0.3.13 h1:8QDpI1KXRhvxGlHpomHn641+S3aq7nGLtD8mMLZvCHo=
github.com/vincentkoc/crawlkit v0.3.13/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0=
github.com/vincentkoc/crawlkit v0.3.14 h1:+bE9yPjfE2VRvquJEpHvh+35qcX70RiqisZv/7vChW0=
github.com/vincentkoc/crawlkit v0.3.14/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=

View File

@ -7,6 +7,7 @@ import (
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
@ -1821,6 +1822,7 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
Reporter: func(message string) {
fmt.Fprintln(a.Stderr, message)
},
Logger: progressLogger(a.Stderr),
})
if err != nil {
return syncer.Stats{}, err
@ -1828,6 +1830,17 @@ func (a *App) syncRepository(ctx context.Context, owner, repo string, options sy
return stats, nil
}
func progressLogger(w io.Writer) *slog.Logger {
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}
func (a *App) runInit(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)

View File

@ -6,6 +6,7 @@ import (
"encoding/hex"
"encoding/json"
"fmt"
"log/slog"
"strconv"
"strings"
"time"
@ -13,6 +14,7 @@ import (
"github.com/openclaw/gitcrawl/internal/documents"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/store"
"github.com/vincentkoc/crawlkit/progress"
)
type GitHubClient interface {
@ -45,6 +47,7 @@ type Options struct {
IncludeComments bool
IncludePRDetails bool
Reporter gh.Reporter
Logger *slog.Logger
}
type Stats struct {
@ -132,6 +135,15 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
MetadataOnly: !options.IncludeComments,
StartedAt: started,
}
tracker := progress.New(options.Logger, progress.Options{
Name: "sync",
Unit: "threads",
Total: int64(len(rows)),
Attrs: []any{
"repository", stats.Repository,
"state", state,
},
})
persist := func(st *store.Store) error {
for _, row := range rows {
thread := mapIssueToThread(repoID, row, s.now().Format(time.RFC3339Nano))
@ -169,6 +181,11 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
} else {
stats.IssuesSynced++
}
tracker.Add(1,
"number", thread.Number,
"kind", thread.Kind,
"thread_state", thread.State,
)
}
if len(numbers) == 0 && state == "open" && since != "" && options.Limit <= 0 {
closed, err := s.applyClosedOverlapSweep(ctx, st, repoID, options, since)
@ -193,13 +210,17 @@ func (s *Syncer) Sync(ctx context.Context, options Options) (Stats, error) {
}
if !options.IncludeComments {
if err := s.store.WithTx(ctx, persist); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}
if err := persist(s.store); err != nil {
tracker.Finish(err)
return Stats{}, err
}
tracker.Finish(nil)
return stats, nil
}

View File

@ -1,9 +1,12 @@
package syncer
import (
"bytes"
"context"
"encoding/json"
"log/slog"
"path/filepath"
"strings"
"testing"
"time"
@ -286,7 +289,13 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
s := New(fakeGitHub{}, st)
s.now = func() time.Time { return time.Date(2026, 4, 26, 0, 0, 0, 0, time.UTC) }
stats, err := s.Sync(ctx, Options{Owner: "openclaw", Repo: "gitcrawl", IncludeComments: true})
var progressLogs bytes.Buffer
stats, err := s.Sync(ctx, Options{
Owner: "openclaw",
Repo: "gitcrawl",
IncludeComments: true,
Logger: testProgressLogger(&progressLogs),
})
if err != nil {
t.Fatalf("sync: %v", err)
}
@ -321,6 +330,18 @@ func TestSyncPersistsIssuesAndPullRequests(t *testing.T) {
if documentCount != 1 {
t.Fatalf("document count: got %d want 1", documentCount)
}
for _, want := range []string{
`msg="sync progress"`,
`state=finished`,
`unit=threads`,
`percent=100.0`,
`completion=100.0%`,
`repository=openclaw/gitcrawl`,
} {
if !strings.Contains(progressLogs.String(), want) {
t.Fatalf("missing %q in progress logs:\n%s", want, progressLogs.String())
}
}
}
func TestSyncHydratesPullReviewComments(t *testing.T) {
@ -681,3 +702,14 @@ func TestMappingFallbackBranches(t *testing.T) {
t.Fatalf("thread = %+v", thread)
}
}
func testProgressLogger(out *bytes.Buffer) *slog.Logger {
return slog.New(slog.NewTextHandler(out, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}