gitcrawl/internal/cli/app.go
2026-05-05 18:16:02 -07:00

2936 lines
91 KiB
Go

package cli
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"time"
clusterer "github.com/openclaw/gitcrawl/internal/cluster"
"github.com/openclaw/gitcrawl/internal/config"
gh "github.com/openclaw/gitcrawl/internal/github"
"github.com/openclaw/gitcrawl/internal/openai"
"github.com/openclaw/gitcrawl/internal/store"
"github.com/openclaw/gitcrawl/internal/syncer"
"github.com/openclaw/gitcrawl/internal/vector"
"github.com/vincentkoc/crawlkit/control"
)
const (
defaultTUIMinSize = 5
defaultTUIWorkingSetLimit = 500
defaultClusterMaxSize = 40
defaultClusterFanout = 16
defaultClusterThreshold = 0.80
defaultCrossKindMinScore = 0.93
highConfidenceEdgeScore = 0.90
weakEdgeMinTitleOverlap = 0.18
deterministicRefScore = 0.94
bodyRefEvidencePrefixChars = 240
)
var threadReferencePattern = regexp.MustCompile(`(?i)(?:\b[\w.-]+/[\w.-]+#(\d+)|(?:issues|pull)/(\d+)|#(\d{2,}))`)
var titleTokenPattern = regexp.MustCompile(`[A-Za-z0-9]{4,}`)
type referenceEvidence struct {
Title bool
EarlyBody bool
}
type App struct {
Stdout io.Writer
Stderr io.Writer
configPath string
format OutputFormat
}
type initResult struct {
ConfigPath string `json:"config_path"`
DBPath string `json:"db_path"`
CacheDir string `json:"cache_dir"`
VectorDir string `json:"vector_dir"`
PortableStoreURL string `json:"portable_store_url,omitempty"`
PortableStoreDir string `json:"portable_store_dir,omitempty"`
PortableStore string `json:"portable_store,omitempty"`
}
type OutputFormat string
const (
FormatText OutputFormat = "text"
FormatJSON OutputFormat = "json"
FormatLog OutputFormat = "log"
)
var version = "dev"
func New() *App {
return &App{
Stdout: os.Stdout,
Stderr: os.Stderr,
format: FormatText,
}
}
func (a *App) Run(ctx context.Context, args []string) error {
global := flag.NewFlagSet("gitcrawl", flag.ContinueOnError)
global.SetOutput(io.Discard)
configPath := global.String("config", "", "config path")
format := global.String("format", string(FormatText), "output format: text|json|log")
jsonOut := global.Bool("json", false, "write JSON output")
versionFlag := global.Bool("version", false, "print version")
global.Bool("no-color", false, "disable color output")
if err := global.Parse(args); err != nil {
if errors.Is(err, flag.ErrHelp) {
a.printUsage()
return nil
}
return usageErr(err)
}
resolvedFormat, err := resolveOutputFormat(*format, *jsonOut)
if err != nil {
return usageErr(err)
}
a.configPath = strings.TrimSpace(*configPath)
a.format = resolvedFormat
rest := global.Args()
if *versionFlag {
return a.writeOutput("version", map[string]string{"version": version}, false)
}
if len(rest) == 0 || rest[0] == "--help" || rest[0] == "-h" {
a.printUsage()
return nil
}
if rest[0] == "help" {
if len(rest) > 1 {
return a.printCommandUsage(rest[1])
}
a.printUsage()
return nil
}
switch rest[0] {
case "version":
return a.writeOutput("version", map[string]string{"version": version}, false)
case "metadata":
return a.runMetadata(rest[1:])
case "serve":
return usageErr(fmt.Errorf("serve is not supported in gitcrawl"))
case "init":
return a.runInit(ctx, rest[1:])
case "doctor":
return a.runDoctor(ctx, rest[1:])
case "status":
return a.runStatus(ctx, rest[1:])
case "sync":
return a.runSync(ctx, rest[1:])
case "threads":
return a.runThreads(ctx, rest[1:])
case "close-thread":
return a.runCloseThread(ctx, rest[1:])
case "reopen-thread":
return a.runReopenThread(ctx, rest[1:])
case "close-cluster":
return a.runCloseCluster(ctx, rest[1:])
case "reopen-cluster":
return a.runReopenCluster(ctx, rest[1:])
case "exclude-cluster-member":
return a.runExcludeClusterMember(ctx, rest[1:])
case "include-cluster-member":
return a.runIncludeClusterMember(ctx, rest[1:])
case "set-cluster-canonical":
return a.runSetClusterCanonical(ctx, rest[1:])
case "runs":
return a.runRuns(ctx, rest[1:])
case "search":
return a.runSearch(ctx, rest[1:])
case "gh":
return a.runGHShim(ctx, rest[1:])
case "configure":
return a.runConfigure(rest[1:])
case "refresh":
return a.runRefresh(ctx, rest[1:])
case "embed":
return a.runEmbed(ctx, rest[1:])
case "clusters":
return a.runClusters(ctx, rest[1:])
case "durable-clusters":
return a.runDurableClusters(ctx, rest[1:])
case "cluster-detail":
return a.runClusterDetail(ctx, rest[1:])
case "cluster-explain":
return a.runClusterDetail(ctx, rest[1:])
case "neighbors":
return a.runNeighbors(ctx, rest[1:])
case "cluster":
return a.runCluster(ctx, rest[1:])
case "portable":
return a.runPortable(ctx, rest[1:])
case "tui":
return a.runTUI(ctx, rest[1:])
case "summarize", "key-summaries", "cluster-experiment", "merge-clusters", "split-cluster", "export-sync", "import-sync", "validate-sync", "portable-size", "sync-status", "optimize", "completion":
_ = ctx
return notImplemented(rest[0])
default:
return usageErr(fmt.Errorf("unknown command %q", rest[0]))
}
}
func (a *App) runConfigure(args []string) error {
fs := flag.NewFlagSet("configure", flag.ContinueOnError)
fs.SetOutput(io.Discard)
summaryModel := fs.String("summary-model", "", "summary model")
embedModel := fs.String("embed-model", "", "embedding model")
embeddingBasis := fs.String("embedding-basis", "", "embedding basis")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"summary-model": true, "embed-model": true, "embedding-basis": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
cfg, err := config.Load(a.configPath)
configExists := true
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
configExists = false
cfg = config.Default()
}
updated := false
if strings.TrimSpace(*summaryModel) != "" {
cfg.OpenAI.SummaryModel = strings.TrimSpace(*summaryModel)
updated = true
}
if strings.TrimSpace(*embedModel) != "" {
cfg.OpenAI.EmbedModel = strings.TrimSpace(*embedModel)
updated = true
}
if strings.TrimSpace(*embeddingBasis) != "" {
cfg.EmbeddingBasis = strings.TrimSpace(*embeddingBasis)
updated = true
}
if updated || !configExists {
if err := config.Save(a.configPath, cfg); err != nil {
return err
}
}
return a.writeOutput("configure", map[string]any{
"config_path": config.ResolvePath(a.configPath),
"updated": updated || !configExists,
"summary_model": cfg.OpenAI.SummaryModel,
"embed_model": cfg.OpenAI.EmbedModel,
"embedding_basis": cfg.EmbeddingBasis,
}, true)
}
type refreshResult struct {
Repository string `json:"repository"`
Selected map[string]bool `json:"selected"`
Sync *syncer.Stats `json:"sync,omitempty"`
Embed *embedResult `json:"embed,omitempty"`
Cluster map[string]any `json:"cluster,omitempty"`
}
func (a *App) runRefresh(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("refresh", flag.ContinueOnError)
fs.SetOutput(io.Discard)
noSync := fs.Bool("no-sync", false, "skip GitHub sync stage")
noEmbed := fs.Bool("no-embed", false, "skip embedding stage")
noCluster := fs.Bool("no-cluster", false, "skip clustering stage")
includeComments := fs.Bool("include-comments", false, "hydrate comments during sync")
fs.Bool("include-code", false, "accepted for compatibility; code hydration is not implemented yet")
since := fs.String("since", "", "GitHub since timestamp")
state := fs.String("state", "", "GitHub issue state: open|closed|all; default open")
limitRaw := fs.String("limit", "", "maximum sync or embedding rows")
thresholdRaw := fs.String("threshold", fmt.Sprintf("%.2f", defaultClusterThreshold), "minimum cluster cosine score")
minSizeRaw := fs.String("min-size", "1", "minimum cluster member count")
maxClusterSizeRaw := fs.String("max-cluster-size", strconv.Itoa(defaultClusterMaxSize), "maximum members per generated cluster")
fanoutRaw := fs.String("k", strconv.Itoa(defaultClusterFanout), "nearest-neighbor fanout per thread")
crossKindThresholdRaw := fs.String("cross-kind-threshold", fmt.Sprintf("%.2f", defaultCrossKindMinScore), "minimum score for issue/pull request edges")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"since": true, "state": true, "limit": true, "threshold": true, "min-size": true, "max-cluster-size": true, "k": true, "cross-kind-threshold": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("refresh requires owner/repo"))
}
if *noSync && *noEmbed && *noCluster {
return usageErr(fmt.Errorf("refresh requires at least one selected stage"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
threshold, err := parseOptionalFloat(*thresholdRaw)
if err != nil {
return usageErr(err)
}
if threshold <= 0 || threshold > 1 {
return usageErr(fmt.Errorf("refresh requires --threshold between 0 and 1"))
}
minSize, err := parseOptionalPositiveInt(*minSizeRaw)
if err != nil {
return usageErr(err)
}
if minSize <= 0 {
minSize = 2
}
maxClusterSize, fanout, crossKindThreshold, err := parseClusterShapeOptions("refresh", *maxClusterSizeRaw, *fanoutRaw, *crossKindThresholdRaw)
if err != nil {
return err
}
result := refreshResult{
Repository: owner + "/" + repoName,
Selected: map[string]bool{
"sync": !*noSync,
"embed": !*noEmbed,
"cluster": !*noCluster,
},
}
if !*noSync {
fmt.Fprintln(a.Stderr, "[refresh] sync")
stats, err := a.syncRepository(ctx, owner, repoName, syncOptions{
Since: strings.TrimSpace(*since),
State: strings.TrimSpace(*state),
Limit: limit,
IncludeComments: *includeComments,
})
if err != nil {
return err
}
result.Repository = stats.Repository
result.Sync = &stats
}
if !*noEmbed {
fmt.Fprintln(a.Stderr, "[refresh] embed")
embed, err := a.embedRepository(ctx, owner, repoName, embedOptions{Limit: limit, IncludeClosed: stateIncludesClosed(*state)})
if err != nil {
return err
}
result.Repository = embed.Repository
result.Embed = &embed
}
if !*noCluster {
fmt.Fprintln(a.Stderr, "[refresh] cluster")
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
_ = rt.Store.Close()
return err
}
query := store.ThreadVectorQuery{RepoID: repo.ID, Model: rt.Config.OpenAI.EmbedModel, Basis: rt.Config.EmbeddingBasis}
query.IncludeClosed = stateIncludesClosed(*state)
vectors, err := rt.Store.ListThreadVectorsFiltered(ctx, query)
if err != nil {
_ = rt.Store.Close()
return err
}
if len(vectors) == 0 {
vectors, err = rt.Store.ListThreadVectorsFiltered(ctx, store.ThreadVectorQuery{RepoID: repo.ID})
if err != nil {
_ = rt.Store.Close()
return err
}
}
clusterResult, err := clusterRepository(ctx, rt.Store, repo.ID, vectors, clusterBuildOptions{
Threshold: threshold,
MinSize: minSize,
MaxClusterSize: maxClusterSize,
Fanout: fanout,
CrossKindThreshold: crossKindThreshold,
})
_ = rt.Store.Close()
if err != nil {
return err
}
result.Repository = repo.FullName
result.Cluster = map[string]any{
"threshold": threshold,
"cross_kind": crossKindThreshold,
"min_size": minSize,
"max_size": maxClusterSize,
"k": fanout,
"vector_count": len(vectors),
"edge_count": clusterResult.EdgeCount,
"cluster_count": clusterResult.ClusterCount,
"member_count": clusterResult.MemberCount,
"run_id": clusterResult.RunID,
}
}
return a.writeOutput("refresh", result, true)
}
func (a *App) runSearch(ctx context.Context, args []string) error {
if len(args) > 0 && isGHSearchKind(args[0]) {
return a.runGHSearch(ctx, args)
}
fs := flag.NewFlagSet("search", flag.ContinueOnError)
fs.SetOutput(io.Discard)
query := fs.String("query", "", "search query")
limitRaw := fs.String("limit", "", "maximum hit rows")
mode := fs.String("mode", "keyword", "search mode: keyword|semantic|hybrid")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"query": true, "limit": true, "mode": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("search requires owner/repo"))
}
if strings.TrimSpace(*query) == "" {
return usageErr(fmt.Errorf("search requires --query"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
searchMode := strings.TrimSpace(*mode)
if searchMode == "" {
searchMode = "keyword"
}
if searchMode != "keyword" && searchMode != "semantic" && searchMode != "hybrid" {
return usageErr(fmt.Errorf("unsupported search mode %q", searchMode))
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
hits, err := rt.Store.SearchDocuments(ctx, repo.ID, strings.TrimSpace(*query), limit)
if err != nil {
return err
}
return a.writeOutput("search", map[string]any{
"repository": repo.FullName,
"query": strings.TrimSpace(*query),
"mode": searchMode,
"hits": hits,
}, true)
}
func (a *App) runNeighbors(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("neighbors", flag.ContinueOnError)
fs.SetOutput(io.Discard)
numberRaw := fs.String("number", "", "issue or pull request number")
limitRaw := fs.String("limit", "", "maximum neighbor rows")
thresholdRaw := fs.String("threshold", "", "minimum cosine score")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"number": true, "limit": true, "threshold": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("neighbors requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
number, err := parseRequiredPositiveInt("number", *numberRaw)
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
threshold, err := parseOptionalFloat(*thresholdRaw)
if err != nil {
return usageErr(err)
}
if limit <= 0 {
limit = 10
}
if threshold <= 0 {
threshold = 0.2
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
targetThread, targetVector, err := rt.Store.ThreadVectorByNumber(ctx, store.ThreadVectorQuery{
RepoID: repo.ID,
Model: rt.Config.OpenAI.EmbedModel,
Basis: rt.Config.EmbeddingBasis,
}, number)
if err != nil {
var fallbackErr error
targetThread, targetVector, fallbackErr = rt.Store.ThreadVectorByNumber(ctx, store.ThreadVectorQuery{RepoID: repo.ID}, number)
if fallbackErr != nil {
return err
}
}
vectors, err := rt.Store.ListThreadVectorsFiltered(ctx, store.ThreadVectorQuery{
RepoID: repo.ID,
Model: targetVector.Model,
Basis: targetVector.Basis,
Dimensions: targetVector.Dimensions,
})
if err != nil {
return err
}
items := make([]vector.Item, 0, len(vectors))
for _, stored := range vectors {
items = append(items, vector.Item{ThreadID: stored.ThreadID, Vector: stored.Vector})
}
candidates := vector.Query(items, targetVector.Vector, limit*2, targetThread.ID)
filtered := make([]vector.Neighbor, 0, limit)
for _, candidate := range candidates {
if candidate.Score < threshold {
continue
}
filtered = append(filtered, candidate)
if len(filtered) >= limit {
break
}
}
ids := make([]int64, 0, len(filtered))
for _, candidate := range filtered {
ids = append(ids, candidate.ThreadID)
}
threads, err := rt.Store.ThreadsByIDs(ctx, repo.ID, ids)
if err != nil {
return err
}
neighbors := make([]map[string]any, 0, len(filtered))
for _, candidate := range filtered {
thread, ok := threads[candidate.ThreadID]
if !ok {
continue
}
neighbors = append(neighbors, map[string]any{
"thread_id": candidate.ThreadID,
"number": thread.Number,
"kind": thread.Kind,
"title": thread.Title,
"score": candidate.Score,
})
}
return a.writeOutput("neighbors", map[string]any{
"repository": repo.FullName,
"thread": targetThread,
"neighbors": neighbors,
}, true)
}
func (a *App) runCluster(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("cluster", flag.ContinueOnError)
fs.SetOutput(io.Discard)
thresholdRaw := fs.String("threshold", fmt.Sprintf("%.2f", defaultClusterThreshold), "minimum cosine score")
minSizeRaw := fs.String("min-size", "1", "minimum cluster member count")
maxClusterSizeRaw := fs.String("max-cluster-size", strconv.Itoa(defaultClusterMaxSize), "maximum members per generated cluster")
fanoutRaw := fs.String("k", strconv.Itoa(defaultClusterFanout), "nearest-neighbor fanout per thread")
crossKindThresholdRaw := fs.String("cross-kind-threshold", fmt.Sprintf("%.2f", defaultCrossKindMinScore), "minimum score for issue/pull request edges")
limitRaw := fs.String("limit", "", "maximum vector rows to cluster")
model := fs.String("model", "", "embedding model")
basis := fs.String("basis", "", "embedding basis")
includeClosed := fs.Bool("include-closed", false, "include closed issue and pull request vectors")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"threshold": true, "min-size": true, "max-cluster-size": true, "k": true, "cross-kind-threshold": true, "limit": true, "model": true, "basis": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("cluster requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
threshold, err := parseOptionalFloat(*thresholdRaw)
if err != nil {
return usageErr(err)
}
if threshold <= 0 || threshold > 1 {
return usageErr(fmt.Errorf("cluster requires --threshold between 0 and 1"))
}
minSize, err := parseOptionalPositiveInt(*minSizeRaw)
if err != nil {
return usageErr(err)
}
if minSize <= 0 {
minSize = 2
}
maxClusterSize, fanout, crossKindThreshold, err := parseClusterShapeOptions("cluster", *maxClusterSizeRaw, *fanoutRaw, *crossKindThresholdRaw)
if err != nil {
return err
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
query := store.ThreadVectorQuery{
RepoID: repo.ID,
Model: firstNonEmpty(strings.TrimSpace(*model), rt.Config.OpenAI.EmbedModel),
Basis: firstNonEmpty(strings.TrimSpace(*basis), rt.Config.EmbeddingBasis),
IncludeClosed: *includeClosed,
}
vectors, err := rt.Store.ListThreadVectorsFiltered(ctx, query)
if err != nil {
return err
}
if len(vectors) == 0 && strings.TrimSpace(*model) == "" && strings.TrimSpace(*basis) == "" {
vectors, err = rt.Store.ListThreadVectorsFiltered(ctx, store.ThreadVectorQuery{RepoID: repo.ID, IncludeClosed: *includeClosed})
if err != nil {
return err
}
}
if limit > 0 && len(vectors) > limit {
vectors = vectors[:limit]
}
clusterResult, err := clusterRepository(ctx, rt.Store, repo.ID, vectors, clusterBuildOptions{
Threshold: threshold,
MinSize: minSize,
MaxClusterSize: maxClusterSize,
Fanout: fanout,
CrossKindThreshold: crossKindThreshold,
})
if err != nil {
return err
}
return a.writeOutput("cluster", map[string]any{
"repository": repo.FullName,
"threshold": threshold,
"cross_kind": crossKindThreshold,
"min_size": minSize,
"max_size": maxClusterSize,
"k": fanout,
"vector_count": len(vectors),
"edge_count": clusterResult.EdgeCount,
"cluster_count": clusterResult.ClusterCount,
"member_count": clusterResult.MemberCount,
"run_id": clusterResult.RunID,
}, true)
}
type embedResult struct {
Repository string `json:"repository"`
Model string `json:"model"`
Basis string `json:"basis"`
Selected int `json:"selected"`
Embedded int `json:"embedded"`
Skipped int `json:"skipped"`
Failed int `json:"failed,omitempty"`
Retries int `json:"retries,omitempty"`
Status string `json:"status,omitempty"`
Failures []embedFailureStat `json:"failures,omitempty"`
RunID int64 `json:"run_id"`
}
type embedFailureStat struct {
BatchStart int `json:"batch_start"`
BatchEnd int `json:"batch_end"`
Attempts int `json:"attempts"`
Status int `json:"status,omitempty"`
Type string `json:"type,omitempty"`
Code string `json:"code,omitempty"`
Message string `json:"message"`
}
func (a *App) runEmbed(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("embed", flag.ContinueOnError)
fs.SetOutput(io.Discard)
numberRaw := fs.String("number", "", "embed one issue or pull request number")
limitRaw := fs.String("limit", "", "maximum rows to embed")
force := fs.Bool("force", false, "re-embed even when content hash is unchanged")
includeClosed := fs.Bool("include-closed", false, "include closed issue and pull request rows")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"number": true, "limit": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("embed requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
result, err := a.embedRepository(ctx, owner, repoName, embedOptions{
Number: number,
Limit: limit,
Force: *force,
IncludeClosed: *includeClosed,
})
if err != nil {
return err
}
return a.writeOutput("embed", result, true)
}
type embedOptions struct {
Number int
Limit int
Force bool
IncludeClosed bool
}
func (a *App) embedRepository(ctx context.Context, owner, repoName string, options embedOptions) (embedResult, error) {
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return embedResult{}, err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return embedResult{}, err
}
if rt.Config.EmbeddingBasis == "title_summary" {
return embedResult{}, fmt.Errorf("embedding basis %q needs summarize support, which is not implemented yet; use `gitcrawl configure --embedding-basis title_original`", rt.Config.EmbeddingBasis)
}
token := config.ResolveOpenAIKey(rt.Config)
if token.Value == "" {
return embedResult{}, fmt.Errorf("missing OpenAI API key: set %s", rt.Config.OpenAI.APIKeyEnv)
}
tasks, err := rt.Store.ListEmbeddingTasks(ctx, store.EmbeddingTaskOptions{
RepoID: repo.ID,
Basis: rt.Config.EmbeddingBasis,
Model: rt.Config.OpenAI.EmbedModel,
Number: options.Number,
Limit: options.Limit,
Force: options.Force,
IncludeClosed: options.IncludeClosed,
})
if err != nil {
return embedResult{}, err
}
started := time.Now().UTC().Format(time.RFC3339Nano)
batchSize := rt.Config.OpenAI.BatchSize
if batchSize <= 0 {
batchSize = 64
}
client := openai.New(openai.Options{APIKey: token.Value, BaseURL: openAIBaseURL(), Dimensions: rt.Config.OpenAI.EmbedDimensions, Retry: embedRetryOverride()})
type pendingBatch struct {
start, end int
attempts int
}
var queue []pendingBatch
for start := 0; start < len(tasks); start += batchSize {
end := start + batchSize
if end > len(tasks) {
end = len(tasks)
}
queue = append(queue, pendingBatch{start: start, end: end})
}
embedded := 0
totalRetries := 0
var failures []embedFailureStat
cancelled := false
var cancelErr error
const maxBatchAttempts = 2
for len(queue) > 0 {
batch := queue[0]
queue = queue[1:]
batch.attempts++
slice := tasks[batch.start:batch.end]
texts := make([]string, 0, len(slice))
for _, task := range slice {
texts = append(texts, task.Text)
}
fmt.Fprintf(a.Stderr, "[embed] embedding %d-%d of %d (attempt %d)\n", batch.start+1, batch.end, len(tasks), batch.attempts)
if batch.attempts == 1 {
if truncated := truncatedEmbeddingTaskCount(slice); truncated > 0 {
fmt.Fprintf(a.Stderr, "[embed] truncated %d input(s) to embedding input budget (%d runes/%d bytes)\n", truncated, store.MaxEmbeddingTextRunes, store.MaxEmbeddingTextBytes)
}
}
vectors, err := client.Embed(ctx, rt.Config.OpenAI.EmbedModel, texts)
if err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
cancelled = true
cancelErr = err
break
}
retryable := true
if apiErr := openai.AsAPIError(err); apiErr != nil {
retryable = apiErr.Retryable()
}
if retryable && batch.attempts < maxBatchAttempts {
totalRetries++
fmt.Fprintf(a.Stderr, "[embed] batch %d-%d failed (%s), requeueing\n", batch.start+1, batch.end, summarizeEmbedErr(err))
queue = append(queue, batch)
continue
}
fmt.Fprintf(a.Stderr, "[embed] batch %d-%d failed permanently: %s\n", batch.start+1, batch.end, summarizeEmbedErr(err))
failures = append(failures, makeEmbedFailureStat(batch.start, batch.end, batch.attempts, err))
continue
}
now := time.Now().UTC().Format(time.RFC3339Nano)
for index, vector := range vectors {
task := slice[index]
if err := rt.Store.UpsertThreadVector(ctx, store.ThreadVector{
ThreadID: task.ThreadID,
Basis: rt.Config.EmbeddingBasis,
Model: rt.Config.OpenAI.EmbedModel,
Dimensions: len(vector),
ContentHash: task.ContentHash,
Vector: vector,
Backend: "openai",
CreatedAt: now,
UpdatedAt: now,
}); err != nil {
return embedResult{}, err
}
embedded++
}
}
failedRows := 0
for _, f := range failures {
failedRows += f.BatchEnd - f.BatchStart
}
status := "success"
switch {
case cancelled:
status = "cancelled"
case len(failures) > 0 && embedded == 0:
status = "error"
case len(failures) > 0:
status = "partial"
}
result := embedResult{
Repository: repo.FullName,
Model: rt.Config.OpenAI.EmbedModel,
Basis: rt.Config.EmbeddingBasis,
Selected: len(tasks),
Embedded: embedded,
Failed: failedRows,
Retries: totalRetries,
Status: status,
Failures: failures,
}
statsJSON, _ := json.Marshal(result)
runRecord := store.RunRecord{
RepoID: repo.ID,
Kind: "embedding",
Scope: "repo",
Status: status,
StartedAt: started,
FinishedAt: time.Now().UTC().Format(time.RFC3339Nano),
StatsJSON: string(statsJSON),
}
if cancelled && cancelErr != nil {
runRecord.ErrorText = cancelErr.Error()
} else if status == "error" && len(failures) > 0 {
runRecord.ErrorText = failures[0].Message
}
recordCtx := ctx
if cancelled {
var cancelRecord context.CancelFunc
recordCtx, cancelRecord = context.WithTimeout(context.Background(), 5*time.Second)
defer cancelRecord()
}
runID, recordErr := rt.Store.RecordRun(recordCtx, runRecord)
if recordErr != nil && !cancelled {
return embedResult{}, recordErr
}
result.RunID = runID
if cancelled {
return result, cancelErr
}
if status == "error" {
return result, fmt.Errorf("openai embeddings failed: %s", failures[0].Message)
}
return result, nil
}
func summarizeEmbedErr(err error) string {
if apiErr := openai.AsAPIError(err); apiErr != nil {
parts := []string{fmt.Sprintf("status=%d", apiErr.Status)}
if apiErr.Type != "" {
parts = append(parts, "type="+apiErr.Type)
}
if apiErr.Code != "" {
parts = append(parts, "code="+apiErr.Code)
}
return strings.Join(parts, " ")
}
return err.Error()
}
func makeEmbedFailureStat(start, end, attempts int, err error) embedFailureStat {
stat := embedFailureStat{
BatchStart: start,
BatchEnd: end,
Attempts: attempts,
Message: err.Error(),
}
if apiErr := openai.AsAPIError(err); apiErr != nil {
stat.Status = apiErr.Status
stat.Type = apiErr.Type
stat.Code = apiErr.Code
if apiErr.Message != "" {
stat.Message = apiErr.Message
}
}
return stat
}
func embedRetryOverride() *openai.RetryConfig {
if strings.TrimSpace(os.Getenv("GITCRAWL_OPENAI_RETRY_DISABLED")) == "1" {
cfg := openai.NoRetry()
return &cfg
}
return nil
}
func truncatedEmbeddingTaskCount(tasks []store.EmbeddingTask) int {
count := 0
for _, task := range tasks {
if task.TextTruncated {
count++
}
}
return count
}
func openAIBaseURL() string {
if value := strings.TrimSpace(os.Getenv("GITCRAWL_OPENAI_BASE_URL")); value != "" {
return value
}
return strings.TrimSpace(os.Getenv("OPENAI_BASE_URL"))
}
func githubBaseURL() string {
if value := strings.TrimSpace(os.Getenv("GITCRAWL_GITHUB_BASE_URL")); value != "" {
return value
}
return strings.TrimSpace(os.Getenv("GITHUB_BASE_URL"))
}
func (a *App) runClusters(ctx context.Context, args []string) error {
return a.runClusterList(ctx, "clusters", args, false)
}
func (a *App) runDurableClusters(ctx context.Context, args []string) error {
return a.runClusterList(ctx, "durable-clusters", args, true)
}
func clusterListIncludesClosed(durable bool, includeClosed bool, hideClosed bool) bool {
if hideClosed {
return false
}
if durable {
return includeClosed
}
return true
}
func (a *App) runClusterList(ctx context.Context, command string, args []string, durable bool) error {
fs := flag.NewFlagSet("clusters", flag.ContinueOnError)
fs.SetOutput(io.Discard)
minSizeRaw := fs.String("min-size", "", "minimum active member count")
limitRaw := fs.String("limit", "", "maximum cluster rows")
sortMode := fs.String("sort", "size", "sort mode: recent|oldest|size")
includeClosed := fs.Bool("include-closed", false, "deprecated; clusters include closed rows by default")
hideClosed := fs.Bool("hide-closed", false, "hide locally closed clusters")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"min-size": true, "limit": true, "sort": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("%s requires owner/repo", command))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
minSize, err := parseOptionalPositiveInt(*minSizeRaw)
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
sort := strings.TrimSpace(*sortMode)
if sort != "recent" && sort != "oldest" && sort != "size" {
return usageErr(fmt.Errorf("unsupported sort %q", sort))
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
options := store.ClusterSummaryOptions{
RepoID: repo.ID,
IncludeClosed: clusterListIncludesClosed(durable, *includeClosed, *hideClosed),
MinSize: minSize,
Limit: limit,
Sort: sort,
}
var clusters []store.ClusterSummary
if durable {
clusters, err = rt.Store.ListClusterSummaries(ctx, options)
} else {
clusters, err = rt.Store.ListDisplayClusterSummaries(ctx, options)
}
if err != nil {
return err
}
return a.writeOutput(command, map[string]any{
"repository": repo.FullName,
"clusters": clusters,
}, true)
}
func (a *App) runTUI(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("tui", flag.ContinueOnError)
fs.SetOutput(io.Discard)
minSizeRaw := fs.String("min-size", "", "minimum active member count")
limitRaw := fs.String("limit", "", "maximum cluster rows")
sortMode := fs.String("sort", "", "sort mode: recent|oldest|size")
includeClosed := fs.Bool("include-closed", false, "deprecated; closed clusters are shown by default")
hideClosed := fs.Bool("hide-closed", false, "hide locally closed clusters")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"min-size": true, "limit": true, "sort": true})); err != nil {
if errors.Is(err, flag.ErrHelp) {
return a.printCommandUsage("tui")
}
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() > 1 {
return usageErr(fmt.Errorf("tui accepts at most one owner/repo"))
}
minSize, err := parseOptionalPositiveInt(*minSizeRaw)
if err != nil {
return usageErr(err)
}
if strings.TrimSpace(*minSizeRaw) == "" {
minSize = defaultTUIMinSize
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
interactive := a.format == FormatText && a.canRunInteractiveTUI()
var rt localRuntime
if interactive {
rt, err = a.openLocalRuntime(ctx)
} else {
rt, err = a.openLocalRuntimeReadOnly(ctx)
}
if err != nil {
if !interactive && errors.Is(err, os.ErrNotExist) {
cfg := config.Default()
if cfgErr := cfg.Normalize(); cfgErr != nil {
return cfgErr
}
sort, sortErr := resolveTUISort(*sortMode, cfg)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, cfg, cfg.DBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
defer rt.Store.Close()
repo, inferred, err := a.resolveOptionalRepository(ctx, rt, fs.Args())
if err != nil {
if !interactive && len(fs.Args()) == 0 && strings.Contains(err.Error(), "no local repositories found") {
sort, sortErr := resolveTUISort(*sortMode, rt.Config)
if sortErr != nil {
return sortErr
}
return a.writeOutput("tui", emptyClusterBrowserPayload(ctx, rt.Config, rt.SourceDBPath, sort, minSize, limit, *hideClosed), true)
}
return err
}
sort, err := resolveTUISort(*sortMode, rt.Config)
if err != nil {
return err
}
showClosed := !*hideClosed || *includeClosed
clusters, err := rt.Store.ListDisplayClusterSummaries(ctx, store.ClusterSummaryOptions{
RepoID: repo.ID,
IncludeClosed: showClosed,
MinSize: minSize,
Limit: limit,
Sort: sort,
})
if err != nil {
return err
}
if interactive {
workingSet, err := rt.Store.ListDisplayClusterSummaries(ctx, store.ClusterSummaryOptions{
RepoID: repo.ID,
IncludeClosed: showClosed,
MinSize: 1,
Limit: maxInt(defaultTUIWorkingSetLimit, limit),
Sort: sort,
})
if err != nil {
return err
}
clusters = mergeClusterSummaries(clusters, workingSet)
}
if clusters == nil {
clusters = []store.ClusterSummary{}
}
payload := clusterBrowserPayload{
Repository: repo.FullName,
InferredRepository: inferred,
Mode: "cluster-browser",
DBSource: databaseSourceKind(rt.SourceDBPath),
DBLocation: databaseSourceLocation(ctx, rt.SourceDBPath),
DBRefreshSource: remoteRefreshSource(rt),
DBRuntimePath: remoteRuntimePath(rt),
Sort: sort,
MinSize: minSize,
Limit: limit,
HideClosed: !showClosed,
EmbedModel: rt.Config.OpenAI.EmbedModel,
EmbeddingBasis: rt.Config.EmbeddingBasis,
Clusters: clusters,
}
if !interactive {
if a.format == FormatText {
return usageErr(fmt.Errorf("tui requires an interactive terminal; run it from a TTY or pass --json for machine-readable cluster data"))
}
return a.writeOutput("tui", payload, true)
}
return a.runInteractiveTUI(ctx, rt.Store, repo.ID, payload)
}
func resolveTUISort(raw string, cfg config.Config) (string, error) {
sort := strings.TrimSpace(raw)
if sort == "" {
sort = strings.TrimSpace(cfg.TUI.DefaultSort)
}
if sort == "" {
sort = "size"
}
if sort != "recent" && sort != "oldest" && sort != "size" {
return "", usageErr(fmt.Errorf("unsupported sort %q", sort))
}
return sort, nil
}
func emptyClusterBrowserPayload(ctx context.Context, cfg config.Config, sourceDBPath, sort string, minSize, limit int, hideClosed bool) clusterBrowserPayload {
if strings.TrimSpace(sourceDBPath) == "" {
sourceDBPath = cfg.DBPath
}
return clusterBrowserPayload{
Mode: "cluster-browser",
DBSource: databaseSourceKind(sourceDBPath),
DBLocation: databaseSourceLocation(ctx, sourceDBPath),
Sort: sort,
MinSize: minSize,
Limit: limit,
HideClosed: hideClosed,
EmbedModel: cfg.OpenAI.EmbedModel,
EmbeddingBasis: cfg.EmbeddingBasis,
Clusters: []store.ClusterSummary{},
}
}
func databaseSourceKind(dbPath string) string {
if _, ok := portableStoreRoot(dbPath); ok {
return "remote"
}
return "local"
}
func remoteRefreshSource(rt localRuntime) string {
if rt.RemoteSource {
return rt.SourceDBPath
}
return ""
}
func remoteRuntimePath(rt localRuntime) string {
if rt.RemoteSource {
return rt.Config.DBPath
}
return ""
}
func databaseSourceLocation(ctx context.Context, dbPath string) string {
filename := filepath.Base(dbPath)
root, ok := portableStoreRoot(dbPath)
if !ok {
return filename
}
if repo := githubRepoFromRemote(gitRemoteURL(ctx, root)); repo != "" {
return repo + ":" + filename
}
return filepath.Base(root) + ":" + filename
}
func gitRemoteURL(ctx context.Context, dir string) string {
cmd := exec.CommandContext(ctx, "git", "-C", dir, "remote", "get-url", "origin")
out, err := cmd.Output()
if err != nil {
return ""
}
return strings.TrimSpace(string(out))
}
func githubRepoFromRemote(remote string) string {
value := strings.TrimSuffix(strings.TrimSpace(remote), ".git")
switch {
case strings.HasPrefix(value, "git@github.com:"):
value = strings.TrimPrefix(value, "git@github.com:")
case strings.Contains(value, "github.com/"):
idx := strings.Index(value, "github.com/")
value = value[idx+len("github.com/"):]
default:
return ""
}
value = strings.Trim(value, "/")
parts := strings.Split(value, "/")
if len(parts) < 2 {
return ""
}
return parts[len(parts)-2] + "/" + parts[len(parts)-1]
}
func (a *App) resolveOptionalRepository(ctx context.Context, rt localRuntime, args []string) (store.Repository, bool, error) {
if len(args) == 0 {
repo, err := rt.defaultRepository(ctx)
if err != nil {
return store.Repository{}, false, usageErr(fmt.Errorf("tui could not infer a repository: %w; run gitcrawl sync owner/repo or pass owner/repo explicitly", err))
}
return repo, true, nil
}
owner, repoName, err := parseOwnerRepo(args[0])
if err != nil {
return store.Repository{}, false, usageErr(err)
}
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return store.Repository{}, false, err
}
return repo, false, nil
}
func mergeClusterSummaries(primary, secondary []store.ClusterSummary) []store.ClusterSummary {
if len(primary) == 0 {
return append([]store.ClusterSummary(nil), secondary...)
}
out := append([]store.ClusterSummary(nil), primary...)
seen := make(map[int64]bool, len(out)+len(secondary))
for _, cluster := range out {
seen[cluster.ID] = true
}
for _, cluster := range secondary {
if !seen[cluster.ID] {
out = append(out, cluster)
seen[cluster.ID] = true
}
}
return out
}
func (a *App) runClusterDetail(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("cluster-detail", flag.ContinueOnError)
fs.SetOutput(io.Discard)
clusterIDRaw := fs.String("id", "", "cluster id")
memberLimitRaw := fs.String("member-limit", "", "maximum member rows")
bodyCharsRaw := fs.String("body-chars", "", "maximum body snippet characters")
includeClosed := fs.Bool("include-closed", false, "include closed clusters and members")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "member-limit": true, "body-chars": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("cluster-detail requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, err := parseRequiredPositiveInt("id", *clusterIDRaw)
if err != nil {
return usageErr(err)
}
memberLimit, err := parseOptionalPositiveInt(*memberLimitRaw)
if err != nil {
return usageErr(err)
}
bodyChars, err := parseOptionalPositiveInt(*bodyCharsRaw)
if err != nil {
return usageErr(err)
}
if bodyChars <= 0 {
bodyChars = 280
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
detail, err := rt.Store.ClusterDetail(ctx, store.ClusterDetailOptions{
RepoID: repo.ID,
ClusterID: int64(clusterID),
IncludeClosed: *includeClosed,
MemberLimit: memberLimit,
BodyChars: bodyChars,
})
if err != nil {
return err
}
return a.writeOutput("cluster-detail", map[string]any{
"repository": repo.FullName,
"cluster": detail.Cluster,
"members": detail.Members,
}, true)
}
func (a *App) runRuns(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("runs", flag.ContinueOnError)
fs.SetOutput(io.Discard)
kind := fs.String("kind", "sync", "run kind: sync|summary|embedding|cluster")
limitRaw := fs.String("limit", "", "maximum run rows")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"kind": true, "limit": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("runs requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
runs, err := rt.Store.ListRuns(ctx, repo.ID, strings.TrimSpace(*kind), limit)
if err != nil {
return err
}
return a.writeOutput("runs", map[string]any{
"repository": repo.FullName,
"kind": strings.TrimSpace(*kind),
"runs": runs,
}, true)
}
func (a *App) runThreads(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("threads", flag.ContinueOnError)
fs.SetOutput(io.Discard)
includeClosed := fs.Bool("include-closed", false, "include locally closed rows")
numbersRaw := fs.String("numbers", "", "comma-separated issue or pull request numbers")
limitRaw := fs.String("limit", "", "maximum thread rows")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "limit": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("threads requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
numbers, err := parseOptionalPositiveIntList(*numbersRaw)
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
threads, err := rt.Store.ListThreadsFiltered(ctx, store.ThreadListOptions{
RepoID: repo.ID,
IncludeClosed: *includeClosed,
Numbers: numbers,
Limit: limit,
})
if err != nil {
return err
}
return a.writeOutput("threads", map[string]any{
"repository": repo.FullName,
"threads": threads,
}, true)
}
func (a *App) runCloseThread(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("close-thread", flag.ContinueOnError)
fs.SetOutput(io.Discard)
numberRaw := fs.String("number", "", "issue or pull request number")
reason := fs.String("reason", "CLI manual close", "local close reason")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"number": true, "reason": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("close-thread requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
if err != nil {
return usageErr(err)
}
if number == 0 {
return usageErr(fmt.Errorf("close-thread requires --number"))
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
if err := rt.Store.CloseThreadLocally(ctx, repo.ID, number, *reason); err != nil {
return err
}
return a.writeOutput("close-thread", map[string]any{
"repository": repo.FullName,
"number": number,
"reason": strings.TrimSpace(*reason),
"closed": true,
}, true)
}
func (a *App) runReopenThread(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("reopen-thread", flag.ContinueOnError)
fs.SetOutput(io.Discard)
numberRaw := fs.String("number", "", "issue or pull request number")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"number": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("reopen-thread requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
number, err := parseOptionalPositiveInt(*numberRaw)
if err != nil {
return usageErr(err)
}
if number == 0 {
return usageErr(fmt.Errorf("reopen-thread requires --number"))
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
if err := rt.Store.ReopenThreadLocally(ctx, repo.ID, number); err != nil {
return err
}
return a.writeOutput("reopen-thread", map[string]any{
"repository": repo.FullName,
"number": number,
"reopened": true,
}, true)
}
func (a *App) runCloseCluster(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("close-cluster", flag.ContinueOnError)
fs.SetOutput(io.Discard)
idRaw := fs.String("id", "", "cluster id")
reason := fs.String("reason", "CLI manual close", "local close reason")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "reason": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("close-cluster requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, err := parseOptionalPositiveInt(*idRaw)
if err != nil {
return usageErr(err)
}
if clusterID == 0 {
return usageErr(fmt.Errorf("close-cluster requires --id"))
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
if err := rt.Store.CloseClusterLocally(ctx, repo.ID, int64(clusterID), *reason); err != nil {
return err
}
return a.writeOutput("close-cluster", map[string]any{
"repository": repo.FullName,
"id": clusterID,
"reason": strings.TrimSpace(*reason),
"closed": true,
}, true)
}
func (a *App) runReopenCluster(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("reopen-cluster", flag.ContinueOnError)
fs.SetOutput(io.Discard)
idRaw := fs.String("id", "", "cluster id")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("reopen-cluster requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, err := parseOptionalPositiveInt(*idRaw)
if err != nil {
return usageErr(err)
}
if clusterID == 0 {
return usageErr(fmt.Errorf("reopen-cluster requires --id"))
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
if err := rt.Store.ReopenClusterLocally(ctx, repo.ID, int64(clusterID)); err != nil {
return err
}
return a.writeOutput("reopen-cluster", map[string]any{
"repository": repo.FullName,
"id": clusterID,
"reopened": true,
}, true)
}
func (a *App) runExcludeClusterMember(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("exclude-cluster-member", flag.ContinueOnError)
fs.SetOutput(io.Discard)
idRaw := fs.String("id", "", "cluster id")
numberRaw := fs.String("number", "", "issue or pull request number")
reason := fs.String("reason", "CLI manual exclude", "local override reason")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "number": true, "reason": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("exclude-cluster-member requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, number, err := parseClusterMemberCommandIDs("exclude-cluster-member", *idRaw, *numberRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
override, err := rt.Store.ExcludeClusterMemberLocally(ctx, repo.ID, int64(clusterID), number, *reason)
if err != nil {
return err
}
return a.writeOutput("exclude-cluster-member", map[string]any{
"repository": repo.FullName,
"override": override,
"excluded": true,
}, true)
}
func (a *App) runIncludeClusterMember(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("include-cluster-member", flag.ContinueOnError)
fs.SetOutput(io.Discard)
idRaw := fs.String("id", "", "cluster id")
numberRaw := fs.String("number", "", "issue or pull request number")
reason := fs.String("reason", "CLI manual include", "local override reason")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "number": true, "reason": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("include-cluster-member requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, number, err := parseClusterMemberCommandIDs("include-cluster-member", *idRaw, *numberRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
override, err := rt.Store.IncludeClusterMemberLocally(ctx, repo.ID, int64(clusterID), number, *reason)
if err != nil {
return err
}
return a.writeOutput("include-cluster-member", map[string]any{
"repository": repo.FullName,
"override": override,
"included": true,
}, true)
}
func (a *App) runSetClusterCanonical(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("set-cluster-canonical", flag.ContinueOnError)
fs.SetOutput(io.Discard)
idRaw := fs.String("id", "", "cluster id")
numberRaw := fs.String("number", "", "issue or pull request number")
reason := fs.String("reason", "CLI manual canonical", "local override reason")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"id": true, "number": true, "reason": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("set-cluster-canonical requires owner/repo"))
}
owner, repoName, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
clusterID, number, err := parseClusterMemberCommandIDs("set-cluster-canonical", *idRaw, *numberRaw)
if err != nil {
return usageErr(err)
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
repo, err := rt.repository(ctx, owner, repoName)
if err != nil {
return err
}
override, err := rt.Store.SetClusterCanonicalLocally(ctx, repo.ID, int64(clusterID), number, *reason)
if err != nil {
return err
}
return a.writeOutput("set-cluster-canonical", map[string]any{
"repository": repo.FullName,
"override": override,
"canonical": true,
}, true)
}
func (a *App) runSync(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("sync", flag.ContinueOnError)
fs.SetOutput(io.Discard)
since := fs.String("since", "", "GitHub since timestamp")
state := fs.String("state", "", "GitHub issue state: open|closed|all; default open")
numbersRaw := fs.String("numbers", "", "comma-separated issue or pull request numbers")
limitRaw := fs.String("limit", "", "maximum issue/PR rows")
jsonOut := fs.Bool("json", false, "write JSON output")
includeComments := fs.Bool("include-comments", false, "hydrate issue comments, PR reviews, and PR review comments")
includePRDetails := fs.Bool("include-pr-details", false, "hydrate PR files, commits, checks, and workflow runs")
withRaw := fs.String("with", "", "extra hydration: pr-details")
fs.Bool("include-code", false, "accepted for compatibility; code hydration is not implemented yet")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"numbers": true, "since": true, "state": true, "limit": true, "with": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("sync requires owner/repo"))
}
owner, repo, err := parseOwnerRepo(fs.Arg(0))
if err != nil {
return usageErr(err)
}
limit, err := parseOptionalPositiveInt(*limitRaw)
if err != nil {
return usageErr(err)
}
numbers, err := parseOptionalPositiveIntList(*numbersRaw)
if err != nil {
return usageErr(err)
}
with, err := parseSyncWith(*withRaw)
if err != nil {
return usageErr(err)
}
stats, err := a.syncRepository(ctx, owner, repo, syncOptions{
Since: strings.TrimSpace(*since),
State: strings.TrimSpace(*state),
Limit: limit,
Numbers: numbers,
IncludeComments: *includeComments,
IncludePRDetails: *includePRDetails || with["pr-details"],
})
if err != nil {
return err
}
return a.writeOutput("sync", stats, true)
}
type syncOptions struct {
Since string
State string
Limit int
Numbers []int
IncludeComments bool
IncludePRDetails bool
}
func parseSyncWith(value string) (map[string]bool, error) {
out := map[string]bool{}
for _, part := range strings.Split(value, ",") {
name := strings.TrimSpace(part)
if name == "" {
continue
}
switch name {
case "pr-details":
out[name] = true
default:
return nil, fmt.Errorf("unsupported --with value %q", name)
}
}
return out, nil
}
func (a *App) syncRepository(ctx context.Context, owner, repo string, options syncOptions) (syncer.Stats, error) {
cfg, err := config.Load(a.configPath)
if err != nil {
return syncer.Stats{}, err
}
token := a.resolveGitHubToken(ctx, cfg)
if token.Value == "" {
return syncer.Stats{}, fmt.Errorf("missing GitHub token: set %s or authenticate gh", cfg.GitHub.TokenEnv)
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return syncer.Stats{}, err
}
st, err := store.Open(ctx, cfg.DBPath)
if err != nil {
return syncer.Stats{}, err
}
defer st.Close()
client := gh.New(gh.Options{Token: token.Value, BaseURL: githubBaseURL()})
service := syncer.New(client, st)
stats, err := service.Sync(ctx, syncer.Options{
Owner: owner,
Repo: repo,
State: strings.TrimSpace(options.State),
Since: strings.TrimSpace(options.Since),
Limit: options.Limit,
Numbers: options.Numbers,
IncludeComments: options.IncludeComments,
IncludePRDetails: options.IncludePRDetails,
Reporter: func(message string) {
fmt.Fprintln(a.Stderr, message)
},
Logger: progressLogger(a.Stderr),
})
if err != nil {
return syncer.Stats{}, err
}
return stats, nil
}
func progressLogger(w io.Writer) *slog.Logger {
return slog.New(slog.NewTextHandler(w, &slog.HandlerOptions{
ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
}))
}
func (a *App) runInit(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)
dbPath := fs.String("db", "", "database path")
portableStore := fs.String("portable-store", "", "HTTPS git URL for a portable gitcrawl store")
portableDB := fs.String("portable-db", "data/openclaw__openclaw.sync.db", "database path inside portable store")
storeDir := fs.String("store-dir", "", "local portable store checkout directory")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"db": true, "portable-store": true, "portable-db": true, "store-dir": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if strings.TrimSpace(*dbPath) != "" && strings.TrimSpace(*portableStore) != "" {
return usageErr(fmt.Errorf("use either --db or --portable-store, not both"))
}
cfg := config.Default()
portableStoreURL := strings.TrimSpace(*portableStore)
portableStoreDir := ""
portableStoreAction := ""
if portableStoreURL != "" {
portableStoreDir = strings.TrimSpace(*storeDir)
if portableStoreDir == "" {
portableStoreDir = defaultPortableStoreDir(config.ResolvePath(a.configPath), portableStoreURL)
}
action, err := syncPortableStore(ctx, portableStoreURL, portableStoreDir)
if err != nil {
return err
}
portableStoreAction = action
relativeDB := filepath.Clean(filepath.FromSlash(strings.TrimLeft(strings.TrimSpace(*portableDB), "/")))
if relativeDB == "." || filepath.IsAbs(relativeDB) || strings.HasPrefix(relativeDB, ".."+string(os.PathSeparator)) || relativeDB == ".." {
return usageErr(fmt.Errorf("invalid --portable-db %q", *portableDB))
}
cfg.DBPath = filepath.Join(portableStoreDir, relativeDB)
if _, err := os.Stat(cfg.DBPath); err != nil {
return fmt.Errorf("portable database not found at %s: %w", cfg.DBPath, err)
}
}
if strings.TrimSpace(*dbPath) != "" {
cfg.DBPath = strings.TrimSpace(*dbPath)
}
if err := config.Save(a.configPath, cfg); err != nil {
return err
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return err
}
return a.writeInitOutput(initResult{
ConfigPath: config.ResolvePath(a.configPath),
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
VectorDir: cfg.VectorDir,
PortableStoreURL: portableStoreURL,
PortableStoreDir: portableStoreDir,
PortableStore: portableStoreAction,
})
}
func (a *App) runPortable(ctx context.Context, args []string) error {
if len(args) == 0 {
return usageErr(fmt.Errorf("portable requires a subcommand"))
}
switch args[0] {
case "help", "--help", "-h":
return a.printCommandUsage("portable")
case "prune":
return a.runPortablePrune(ctx, args[1:])
default:
return usageErr(fmt.Errorf("unknown portable subcommand %q", args[0]))
}
}
func (a *App) runPortablePrune(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("portable prune", flag.ContinueOnError)
fs.SetOutput(io.Discard)
bodyCharsRaw := fs.String("body-chars", "256", "maximum thread body characters to keep")
noVacuum := fs.Bool("no-vacuum", false, "skip SQLite vacuum after pruning")
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, map[string]bool{"body-chars": true})); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("portable prune does not take positional arguments"))
}
bodyChars, err := parseOptionalPositiveInt(*bodyCharsRaw)
if err != nil {
return usageErr(err)
}
if bodyChars == 0 {
bodyChars = 256
}
rt, err := a.openLocalRuntime(ctx)
if err != nil {
return err
}
defer rt.Store.Close()
stats, err := rt.Store.PrunePortablePayloads(ctx, store.PortablePruneOptions{
BodyChars: bodyChars,
Vacuum: !*noVacuum,
})
if err != nil {
return err
}
return a.writeOutput("portable prune", stats, true)
}
func defaultPortableStoreDir(configPath, remoteURL string) string {
base := filepath.Join(filepath.Dir(configPath), "stores")
name := strings.TrimSuffix(remoteURL, ".git")
if idx := strings.LastIndex(name, "/"); idx >= 0 {
name = name[idx+1:]
}
name = safePathName(name)
if name == "" {
name = "portable-store"
}
return filepath.Join(base, name)
}
func safePathName(value string) string {
var b strings.Builder
for _, r := range strings.ToLower(value) {
switch {
case r >= 'a' && r <= 'z':
b.WriteRune(r)
case r >= '0' && r <= '9':
b.WriteRune(r)
case r == '-' || r == '_' || r == '.':
b.WriteRune(r)
default:
b.WriteRune('-')
}
}
return strings.Trim(b.String(), "-.")
}
func syncPortableStore(ctx context.Context, remoteURL, dir string) (string, error) {
if strings.TrimSpace(remoteURL) == "" {
return "", fmt.Errorf("portable store URL is required")
}
if strings.TrimSpace(dir) == "" {
return "", fmt.Errorf("portable store directory is required")
}
gitDir := filepath.Join(dir, ".git")
if info, err := os.Stat(gitDir); err == nil && info.IsDir() {
if !gitWorktreeClean(ctx, dir) {
if resetErr := runGit(ctx, "", "-C", dir, "reset", "--hard", "HEAD"); resetErr != nil {
return "", resetErr
}
if retryErr := fastForwardGitCheckout(ctx, dir, false); retryErr != nil {
return "", retryErr
}
if err := removePortableSQLiteSidecars(dir); err != nil {
return "", err
}
return "reset-pulled", nil
}
if err := fastForwardGitCheckout(ctx, dir, false); err != nil {
if !isDirtyPortablePullError(err) {
return "", err
}
if resetErr := runGit(ctx, "", "-C", dir, "reset", "--hard", "HEAD"); resetErr != nil {
return "", err
}
if retryErr := fastForwardGitCheckout(ctx, dir, false); retryErr != nil {
return "", retryErr
}
if err := removePortableSQLiteSidecars(dir); err != nil {
return "", err
}
return "reset-pulled", nil
}
if err := removePortableSQLiteSidecars(dir); err != nil {
return "", err
}
return "pulled", nil
}
if entries, err := os.ReadDir(dir); err == nil && len(entries) > 0 {
return "", fmt.Errorf("portable store directory %s exists but is not a git checkout", dir)
} else if err != nil && !os.IsNotExist(err) {
return "", fmt.Errorf("read portable store directory: %w", err)
}
if err := os.MkdirAll(filepath.Dir(dir), 0o755); err != nil {
return "", fmt.Errorf("create portable store parent: %w", err)
}
if err := runGit(ctx, "", "clone", "--depth", "1", remoteURL, dir); err != nil {
return "", err
}
if err := removePortableSQLiteSidecars(dir); err != nil {
return "", err
}
return "cloned", nil
}
func removePortableSQLiteSidecars(dir string) error {
return filepath.WalkDir(dir, func(path string, entry os.DirEntry, err error) error {
if err != nil {
return err
}
if entry.IsDir() {
if entry.Name() == ".git" {
return filepath.SkipDir
}
return nil
}
if strings.HasSuffix(path, ".db-wal") || strings.HasSuffix(path, ".db-shm") {
if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("remove portable sqlite sidecar %s: %w", path, err)
}
}
return nil
})
}
func isDirtyPortablePullError(err error) bool {
message := err.Error()
return strings.Contains(message, "Your local changes") || strings.Contains(message, "would be overwritten by merge")
}
func fastForwardGitCheckout(ctx context.Context, dir string, quiet bool) error {
branch := currentGitBranch(ctx, dir)
remote := ""
if branch != "" {
value, err := gitConfigValue(ctx, dir, "branch."+branch+".remote")
if err == nil {
remote = value
}
}
if strings.TrimSpace(remote) == "" {
remote = "origin"
}
fetchArgs := []string{"-C", dir, "fetch", "--prune"}
if quiet {
fetchArgs = append(fetchArgs, "--quiet")
}
fetchArgs = append(fetchArgs, remote)
if err := runGit(ctx, "", fetchArgs...); err != nil {
return err
}
target := gitRemoteBranchRef(ctx, dir, remote, branch)
if target == "" {
var err error
target, err = gitOutput(ctx, "", "-C", dir, "symbolic-ref", "--quiet", "--short", "refs/remotes/"+remote+"/HEAD")
if err != nil {
return fmt.Errorf("resolve portable store upstream branch: %w", err)
}
if strings.TrimSpace(target) == "" {
return fmt.Errorf("resolve portable store upstream branch: remote %q has no HEAD", remote)
}
}
mergeArgs := []string{"-C", dir, "merge", "--ff-only"}
if quiet {
mergeArgs = append(mergeArgs, "--quiet")
}
mergeArgs = append(mergeArgs, target)
return runGit(ctx, "", mergeArgs...)
}
func currentGitBranch(ctx context.Context, dir string) string {
branch, err := gitOutput(ctx, "", "-C", dir, "symbolic-ref", "--quiet", "--short", "HEAD")
if err != nil {
return ""
}
return strings.TrimSpace(branch)
}
func gitRemoteBranchRef(ctx context.Context, dir, remote, branch string) string {
if strings.TrimSpace(remote) == "" || strings.TrimSpace(branch) == "" {
return ""
}
ref := "refs/remotes/" + remote + "/" + branch
if err := runGit(ctx, "", "-C", dir, "show-ref", "--verify", "--quiet", ref); err != nil {
return ""
}
return ref
}
func gitConfigValue(ctx context.Context, dir, key string) (string, error) {
value, err := gitOutput(ctx, "", "-C", dir, "config", "--get", key)
return strings.TrimSpace(value), err
}
func runGit(ctx context.Context, workdir string, args ...string) error {
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = workdir
cmd.Env = append(os.Environ(),
"GIT_TERMINAL_PROMPT=0",
"GIT_SSH_COMMAND=ssh -o BatchMode=yes -o ConnectTimeout=10",
)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
return nil
}
func gitOutput(ctx context.Context, workdir string, args ...string) (string, error) {
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = workdir
cmd.Env = append(os.Environ(),
"GIT_TERMINAL_PROMPT=0",
"GIT_SSH_COMMAND=ssh -o BatchMode=yes -o ConnectTimeout=10",
)
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git %s failed: %w\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
return strings.TrimSpace(string(out)), nil
}
func (a *App) runDoctor(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
_ = ctx
cfg, err := config.Load(a.configPath)
configExists := true
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
configExists = false
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return err
}
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return err
}
storeStatus := store.Status{DBPath: cfg.DBPath}
rt, err := a.openLocalRuntimeReadOnly(ctx)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
} else {
defer rt.Store.Close()
storeStatus, err = rt.Store.Status(ctx)
if err != nil {
return err
}
storeStatus.DBPath = cfg.DBPath
}
githubToken := config.ResolveGitHubToken(cfg)
openAIKey := config.ResolveOpenAIKey(cfg)
return a.writeOutput("doctor", map[string]any{
"version": version,
"config_path": config.ResolvePath(a.configPath),
"config_exists": configExists,
"db_path": cfg.DBPath,
"github_token_present": githubToken.Value != "",
"github_token_source": githubToken.Source,
"openai_key_present": openAIKey.Value != "",
"openai_key_source": openAIKey.Source,
"repository_count": storeStatus.RepositoryCount,
"thread_count": storeStatus.ThreadCount,
"open_thread_count": storeStatus.OpenThreadCount,
"cluster_count": storeStatus.ClusterCount,
"last_sync_at": formatOptionalTime(storeStatus.LastSyncAt),
"summary_model": cfg.OpenAI.SummaryModel,
"embed_model": cfg.OpenAI.EmbedModel,
"embedding_basis": cfg.EmbeddingBasis,
"api_supported": false,
}, true)
}
func (a *App) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("metadata takes flags only"))
}
cfg := config.Default()
manifest := control.NewManifest("gitcrawl", "Git Crawl", "gitcrawl")
manifest.Description = "Local-first GitHub issue and pull request crawler."
manifest.Branding = control.Branding{SymbolName: "point.3.connected.trianglepath.dotted", AccentColor: "#2da44e"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "search", "tui", "portable", "clusters", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: false, ExportsSecrets: false, LocalOnlyScopes: []string{"github", "sqlite", "portable"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"gitcrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"gitcrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync repository", Argv: []string{"gitcrawl", "sync", "--json"}, JSON: true, Mutates: true},
"search": {Title: "Search", Argv: []string{"gitcrawl", "search", "--json"}, JSON: true},
"tui": {Title: "Terminal cluster browser", Argv: []string{"gitcrawl", "tui"}},
"tui-json": {Title: "Terminal cluster data", Argv: []string{"gitcrawl", "tui", "--json"}, JSON: true},
"portable": {Title: "Portable store tools", Argv: []string{"gitcrawl", "portable", "prune", "--json"}, JSON: true, Mutates: true},
"clusters": {Title: "Clusters", Argv: []string{"gitcrawl", "clusters", "--json"}, JSON: true},
"legacy-sync-api": {Title: "Legacy sync-status alias", Argv: []string{"gitcrawl", "sync-status"}, Legacy: true, Deprecated: true},
}
return a.writeOutput("metadata", manifest, false)
}
func (a *App) runStatus(ctx context.Context, args []string) error {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "write JSON output")
if err := fs.Parse(normalizeCommandArgs(args, nil)); err != nil {
return usageErr(err)
}
a.applyCommandJSON(*jsonOut)
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("status takes flags only"))
}
cfg, err := config.Load(a.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return err
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return err
}
}
status := store.Status{DBPath: cfg.DBPath}
if _, err := os.Stat(cfg.DBPath); err == nil {
st, err := store.OpenReadOnly(ctx, cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
status, err = st.Status(ctx)
if err != nil {
return err
}
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
status.DBPath = cfg.DBPath
return a.writeOutput("status", controlStatus(config.ResolvePath(a.configPath), cfg, status), false)
}
func controlStatus(configPath string, cfg config.Config, status store.Status) control.Status {
counts := []control.Count{
control.NewCount("repositories", "Repositories", int64(status.RepositoryCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("open_threads", "Open threads", int64(status.OpenThreadCount)),
control.NewCount("clusters", "Clusters", int64(status.ClusterCount)),
}
out := control.NewStatus("gitcrawl", fmt.Sprintf("%d threads across %d repositories", status.ThreadCount, status.RepositoryCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "GitHub archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}
func (a *App) applyCommandJSON(enabled bool) {
if enabled {
a.format = FormatJSON
}
}
func formatOptionalTime(value time.Time) string {
if value.IsZero() {
return ""
}
return value.Format(time.RFC3339Nano)
}
func resolveOutputFormat(value string, jsonOut bool) (OutputFormat, error) {
if jsonOut {
return FormatJSON, nil
}
switch OutputFormat(strings.ToLower(strings.TrimSpace(value))) {
case "", FormatText:
return FormatText, nil
case FormatJSON:
return FormatJSON, nil
case FormatLog:
return FormatLog, nil
default:
return "", fmt.Errorf("unsupported format %q: use text, json, or log", value)
}
}
func parseOwnerRepo(value string) (string, string, error) {
parts := strings.Split(value, "/")
if len(parts) != 2 || strings.TrimSpace(parts[0]) == "" || strings.TrimSpace(parts[1]) == "" {
return "", "", fmt.Errorf("expected owner/repo, got %q", value)
}
return strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]), nil
}
func parseOptionalPositiveInt(value string) (int, error) {
if strings.TrimSpace(value) == "" {
return 0, nil
}
parsed, err := strconv.Atoi(value)
if err != nil || parsed <= 0 {
return 0, fmt.Errorf("expected positive integer, got %q", value)
}
return parsed, nil
}
func parseRequiredPositiveInt(name, value string) (int, error) {
parsed, err := parseOptionalPositiveInt(value)
if err != nil {
return 0, err
}
if parsed == 0 {
return 0, fmt.Errorf("missing --%s", name)
}
return parsed, nil
}
func parseClusterMemberCommandIDs(command, clusterIDRaw, numberRaw string) (int, int, error) {
clusterID, err := parseOptionalPositiveInt(clusterIDRaw)
if err != nil {
return 0, 0, err
}
if clusterID == 0 {
return 0, 0, fmt.Errorf("%s requires --id", command)
}
number, err := parseOptionalPositiveInt(numberRaw)
if err != nil {
return 0, 0, err
}
if number == 0 {
return 0, 0, fmt.Errorf("%s requires --number", command)
}
return clusterID, number, nil
}
type clusterBuildOptions struct {
Threshold float64
MinSize int
MaxClusterSize int
Fanout int
CrossKindThreshold float64
}
func parseClusterShapeOptions(command, maxClusterSizeRaw, fanoutRaw, crossKindThresholdRaw string) (int, int, float64, error) {
maxClusterSize, err := parseOptionalPositiveInt(maxClusterSizeRaw)
if err != nil {
return 0, 0, 0, err
}
fanout, err := parseOptionalPositiveInt(fanoutRaw)
if err != nil {
return 0, 0, 0, err
}
crossKindThreshold, err := parseOptionalFloat(crossKindThresholdRaw)
if err != nil {
return 0, 0, 0, err
}
if maxClusterSize == 0 {
maxClusterSize = defaultClusterMaxSize
}
if fanout == 0 {
fanout = defaultClusterFanout
}
if crossKindThreshold == 0 {
crossKindThreshold = defaultCrossKindMinScore
}
if crossKindThreshold < 0 || crossKindThreshold > 1 {
return 0, 0, 0, fmt.Errorf("%s requires --cross-kind-threshold between 0 and 1", command)
}
return maxClusterSize, fanout, crossKindThreshold, nil
}
func buildDurableClusterInputs(ctx context.Context, st *store.Store, repoID int64, storedVectors []store.ThreadVector, options clusterBuildOptions) ([]store.DurableClusterInput, int, error) {
if options.MinSize <= 0 {
options.MinSize = 1
}
if options.MaxClusterSize <= 0 {
options.MaxClusterSize = defaultClusterMaxSize
}
if options.Fanout <= 0 {
options.Fanout = defaultClusterFanout
}
if options.CrossKindThreshold <= 0 {
options.CrossKindThreshold = defaultCrossKindMinScore
}
threadIDs := make([]int64, 0, len(storedVectors))
vectorByThreadID := make(map[int64][]float64, len(storedVectors))
for _, stored := range storedVectors {
threadIDs = append(threadIDs, stored.ThreadID)
vectorByThreadID[stored.ThreadID] = stored.Vector
}
threads, err := st.ThreadsByIDs(ctx, repoID, threadIDs)
if err != nil {
return nil, 0, err
}
nodes := make([]clusterer.Node, 0, len(storedVectors))
for _, stored := range storedVectors {
thread, ok := threads[stored.ThreadID]
if !ok {
continue
}
nodes = append(nodes, clusterer.Node{ThreadID: stored.ThreadID, Number: thread.Number, Title: thread.Title})
}
candidateByPair := map[string]clusterer.Edge{}
for left := 0; left < len(nodes); left++ {
for right := left + 1; right < len(nodes); right++ {
leftID := nodes[left].ThreadID
rightID := nodes[right].ThreadID
score := vector.Cosine(vectorByThreadID[leftID], vectorByThreadID[rightID])
if score < options.Threshold {
continue
}
if score < highConfidenceEdgeScore && titleTokenOverlap(threads[leftID].Title, threads[rightID].Title) < weakEdgeMinTitleOverlap {
continue
}
if threads[leftID].Kind != threads[rightID].Kind && score < options.CrossKindThreshold {
continue
}
upsertClusterEdge(candidateByPair, leftID, rightID, score)
}
}
addDeterministicReferenceEdges(candidateByPair, nodes, threads)
candidates := make([]clusterer.Edge, 0, len(candidateByPair))
for _, edge := range candidateByPair {
candidates = append(candidates, edge)
}
edges := keepTopEdges(candidates, options.Fanout)
pairScores := map[string]float64{}
for _, edge := range edges {
pairScores[threadIDPairKey(edge.LeftThreadID, edge.RightThreadID)] = edge.Score
}
built := clusterer.BuildWithOptions(nodes, edges, clusterer.Options{MaxSize: options.MaxClusterSize})
inputs := make([]store.DurableClusterInput, 0, len(built))
for _, builtCluster := range built {
if len(builtCluster.Members) < options.MinSize {
continue
}
sort.Slice(builtCluster.Members, func(i, j int) bool {
left := threads[builtCluster.Members[i]]
right := threads[builtCluster.Members[j]]
return left.Number < right.Number
})
identity := store.HumanKeyForValue(fmt.Sprintf("repo:%d:cluster-representative:%d", repoID, builtCluster.RepresentativeThreadID))
clusterType := "duplicate_candidate"
if len(builtCluster.Members) == 1 {
clusterType = "singleton_orphan"
}
input := store.DurableClusterInput{
StableKey: identity.Hash,
StableSlug: store.HumanKeyStableSlug(identity),
ClusterType: clusterType,
RepresentativeThreadID: builtCluster.RepresentativeThreadID,
Title: "Cluster " + identity.Slug,
Members: make([]store.DurableClusterMemberInput, 0, len(builtCluster.Members)),
}
for _, threadID := range builtCluster.Members {
role := "related"
var scorePtr *float64
if threadID == builtCluster.RepresentativeThreadID {
role = "canonical"
scoreCopy := 1.0
scorePtr = &scoreCopy
} else if score, ok := pairScores[threadIDPairKey(threadID, builtCluster.RepresentativeThreadID)]; ok {
scoreCopy := score
scorePtr = &scoreCopy
}
input.Members = append(input.Members, store.DurableClusterMemberInput{ThreadID: threadID, Role: role, ScoreToRepresentative: scorePtr})
}
inputs = append(inputs, input)
}
return inputs, len(edges), nil
}
func upsertClusterEdge(edges map[string]clusterer.Edge, leftID, rightID int64, score float64) {
if leftID == rightID {
return
}
key := threadIDPairKey(leftID, rightID)
if existing, ok := edges[key]; ok && existing.Score >= score {
return
}
if leftID > rightID {
leftID, rightID = rightID, leftID
}
edges[key] = clusterer.Edge{LeftThreadID: leftID, RightThreadID: rightID, Score: score}
}
func addDeterministicReferenceEdges(edges map[string]clusterer.Edge, nodes []clusterer.Node, threads map[int64]store.Thread) {
threadIDByNumber := make(map[int]int64, len(nodes))
for _, node := range nodes {
thread := threads[node.ThreadID]
threadIDByNumber[thread.Number] = node.ThreadID
}
refIDsByThreadID := make(map[int64]map[int64]bool, len(nodes))
for _, node := range nodes {
thread := threads[node.ThreadID]
refNumbers := referencedThreadNumbersByLocation(thread)
refIDs := map[int64]bool{}
for number, evidence := range refNumbers {
if referencedID, ok := threadIDByNumber[number]; ok && referencedID != node.ThreadID {
referencedThread := threads[referencedID]
if evidence.Title || evidence.EarlyBody || titleTokenOverlap(thread.Title, referencedThread.Title) >= weakEdgeMinTitleOverlap {
refIDs[referencedID] = true
}
}
}
refIDsByThreadID[node.ThreadID] = refIDs
}
for threadID, refIDs := range refIDsByThreadID {
for referencedID := range refIDs {
upsertClusterEdge(edges, threadID, referencedID, deterministicRefScore)
}
}
}
func referencedThreadNumbersByLocation(thread store.Thread) map[int]referenceEvidence {
refs := map[int]referenceEvidence{}
collectReferencedThreadNumbers(refs, thread.Number, thread.Body, false)
collectReferencedThreadNumbers(refs, thread.Number, thread.Title, true)
return refs
}
func collectReferencedThreadNumbers(refs map[int]referenceEvidence, threadNumber int, value string, titleRef bool) {
for _, match := range threadReferencePattern.FindAllStringSubmatchIndex(value, -1) {
numberText := ""
for index := 2; index+1 < len(match); index += 2 {
if match[index] >= 0 {
numberText = value[match[index]:match[index+1]]
break
}
}
number, err := strconv.Atoi(numberText)
if err != nil || number <= 0 || number == threadNumber {
continue
}
evidence := refs[number]
if titleRef {
evidence.Title = true
} else if match[0] <= bodyRefEvidencePrefixChars {
evidence.EarlyBody = true
}
refs[number] = evidence
}
}
func titleTokenOverlap(left, right string) float64 {
leftTokens := titleTokenSet(left)
rightTokens := titleTokenSet(right)
if len(leftTokens) == 0 || len(rightTokens) == 0 {
return 0
}
overlap := 0
for token := range leftTokens {
if rightTokens[token] {
overlap++
}
}
base := len(leftTokens)
if len(rightTokens) < base {
base = len(rightTokens)
}
return float64(overlap) / float64(base)
}
func titleTokenSet(value string) map[string]bool {
out := map[string]bool{}
for _, token := range titleTokenPattern.FindAllString(strings.ToLower(value), -1) {
out[token] = true
}
return out
}
func keepTopEdges(edges []clusterer.Edge, fanout int) []clusterer.Edge {
if fanout <= 0 || len(edges) == 0 {
return edges
}
neighbors := map[int64][]clusterer.Edge{}
for _, edge := range edges {
neighbors[edge.LeftThreadID] = append(neighbors[edge.LeftThreadID], edge)
neighbors[edge.RightThreadID] = append(neighbors[edge.RightThreadID], edge)
}
top := map[int64]map[int64]bool{}
for threadID, list := range neighbors {
sort.SliceStable(list, func(i, j int) bool {
if list[i].Score == list[j].Score {
return edgeOtherThreadID(list[i], threadID) < edgeOtherThreadID(list[j], threadID)
}
return list[i].Score > list[j].Score
})
if len(list) > fanout {
list = list[:fanout]
}
seen := make(map[int64]bool, len(list))
for _, edge := range list {
seen[edgeOtherThreadID(edge, threadID)] = true
}
top[threadID] = seen
}
out := make([]clusterer.Edge, 0, len(edges))
for _, edge := range edges {
if top[edge.LeftThreadID][edge.RightThreadID] || top[edge.RightThreadID][edge.LeftThreadID] {
out = append(out, edge)
}
}
return out
}
func edgeOtherThreadID(edge clusterer.Edge, threadID int64) int64 {
if edge.LeftThreadID == threadID {
return edge.RightThreadID
}
return edge.LeftThreadID
}
type clusterRepositoryResult struct {
EdgeCount int
ClusterCount int
MemberCount int
RunID int64
}
func clusterRepository(ctx context.Context, st *store.Store, repoID int64, storedVectors []store.ThreadVector, options clusterBuildOptions) (clusterRepositoryResult, error) {
inputs, edgeCount, err := buildDurableClusterInputs(ctx, st, repoID, storedVectors, options)
if err != nil {
return clusterRepositoryResult{}, err
}
saveResult, err := st.SaveDurableClusters(ctx, repoID, inputs)
if err != nil {
return clusterRepositoryResult{}, err
}
return clusterRepositoryResult{
EdgeCount: edgeCount,
ClusterCount: saveResult.ClusterCount,
MemberCount: saveResult.MemberCount,
RunID: saveResult.RunID,
}, nil
}
func threadIDPairKey(left, right int64) string {
if left > right {
left, right = right, left
}
return strconv.FormatInt(left, 10) + ":" + strconv.FormatInt(right, 10)
}
func parseOptionalFloat(value string) (float64, error) {
if strings.TrimSpace(value) == "" {
return 0, nil
}
parsed, err := strconv.ParseFloat(value, 64)
if err != nil {
return 0, fmt.Errorf("expected number, got %q", value)
}
return parsed, nil
}
func stateIncludesClosed(state string) bool {
switch strings.ToLower(strings.TrimSpace(state)) {
case "all", "closed":
return true
default:
return false
}
}
func parseOptionalPositiveIntList(value string) ([]int, error) {
if strings.TrimSpace(value) == "" {
return nil, nil
}
parts := strings.Split(value, ",")
out := make([]int, 0, len(parts))
for _, part := range parts {
parsed, err := parseOptionalPositiveInt(strings.TrimSpace(part))
if err != nil {
return nil, err
}
out = append(out, parsed)
}
return out, nil
}
func (a *App) writeOutput(title string, payload any, allowLog bool) error {
switch a.format {
case FormatJSON:
data, err := json.MarshalIndent(payload, "", " ")
if err != nil {
return err
}
_, err = fmt.Fprintf(a.Stdout, "%s\n", data)
return err
case FormatLog:
if allowLog {
_, err := fmt.Fprintf(a.Stdout, "%s=%v\n", title, payload)
return err
}
fallthrough
default:
if versionPayload, ok := payload.(map[string]string); ok && title == "version" {
_, err := fmt.Fprintln(a.Stdout, versionPayload["version"])
return err
}
data, err := json.MarshalIndent(payload, "", " ")
if err != nil {
return err
}
_, err = fmt.Fprintf(a.Stdout, "%s\n%s\n", title, data)
return err
}
}
func (a *App) writeInitOutput(result initResult) error {
switch a.format {
case FormatJSON:
return a.writeOutput("init", result, true)
case FormatLog:
_, err := fmt.Fprintf(a.Stdout, "init config_path=%s db_path=%s portable_store=%s\n", result.ConfigPath, result.DBPath, result.PortableStore)
return err
default:
lines := []string{
"gitcrawl init",
"config path: " + result.ConfigPath,
"db path: " + result.DBPath,
"cache dir: " + result.CacheDir,
"vector dir: " + result.VectorDir,
}
if result.PortableStoreURL != "" {
lines = append(lines,
"",
"Portable store",
" url: "+result.PortableStoreURL,
" checkout: "+result.PortableStoreDir,
" state: "+firstNonEmpty(result.PortableStore, "ready"),
)
}
_, err := fmt.Fprintln(a.Stdout, strings.Join(lines, "\n"))
return err
}
}
func (a *App) printUsage() {
fmt.Fprint(a.Stdout, usageText)
}
func (a *App) printCommandUsage(command string) error {
switch command {
case "portable":
fmt.Fprint(a.Stdout, portableUsageText)
return nil
case "tui":
fmt.Fprint(a.Stdout, tuiUsageText)
return nil
default:
return usageErr(fmt.Errorf("unknown help topic %q", command))
}
}
const usageText = `gitcrawl mirrors GitHub issues and pull requests into local SQLite for maintainer triage.
Usage:
gitcrawl [global flags] <command> [command flags]
gitcrawl help <command>
Global flags:
--config <path> config path
--format <mode> output format: text|json|log
--json write JSON output
--version print version
Core commands:
metadata print crawlkit control metadata
status print fast read-only archive status
init create config, optionally from a portable store
doctor check config, token, and database readiness
sync sync GitHub issue and pull request metadata
refresh run sync, enrichment, embedding, and clustering pipeline
threads list local issue and pull request rows
cluster build durable clusters from local thread vectors
close-thread locally hide one issue or pull request row
reopen-thread clear a local hide for one issue or pull request row
close-cluster locally hide one durable cluster
reopen-cluster clear a local hide for one durable cluster
exclude-cluster-member
locally remove one row from a durable cluster
include-cluster-member
restore one row to a durable cluster
set-cluster-canonical
set the canonical row for a durable cluster
clusters list latest run cluster summaries, with durable fallback
durable-clusters list durable cluster groups
cluster-detail dump one latest run cluster, with durable fallback
cluster-explain alias for cluster-detail
neighbors list vector-nearest local issue and pull request rows
search search local thread documents; also supports search issues|prs gh syntax
gh gh-compatible local cache shim with fallback to real gh
portable prune prune volatile payloads from a portable store
tui [owner/repo] browse clusters in the terminal UI; repo is inferred when omitted
No API server is provided. There is intentionally no serve command.
`
const tuiUsageText = `gitcrawl tui opens the local terminal cluster browser.
Usage:
gitcrawl tui [owner/repo] [--limit N] [--min-size N] [--sort recent|oldest|size] [--hide-closed]
If owner/repo is omitted, gitcrawl uses the most recently updated repository in the local database.
The TUI starts with ghcrawl-style cluster display defaults: --min-size 5, --sort size, and closed historical clusters visible. Pass --min-size 1 for singleton clusters or --hide-closed to focus open-only.
Mouse is supported: click rows, wheel panes, right-click for actions, and use the menu for copy/sort/filter/jump/member triage controls.
Press a to open the same action menu from the keyboard.
Press # to jump directly to an issue or PR number.
Press p to switch between repositories already present in the local store.
Press n to load neighbors for the selected issue or PR.
Enter from the members pane also loads neighbors before opening detail.
The TUI quietly refreshes from the local store every 15 seconds and leaves the current status alone when nothing changed.
`
const portableUsageText = `gitcrawl portable manages local portable-store snapshots.
Usage:
gitcrawl portable prune [--body-chars N] [--no-vacuum] [--json]
Subcommands:
prune prune volatile payloads from the configured portable store
`