diff --git a/go.mod b/go.mod index 19c57d0..dac9899 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/openclaw/gitcrawl go 1.26.2 + +require github.com/pelletier/go-toml/v2 v2.3.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3c569cf --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= +github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..43028be --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,227 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/pelletier/go-toml/v2" +) + +const ( + DefaultConfigEnv = "GITCRAWL_CONFIG" + DefaultTokenEnv = "GITHUB_TOKEN" + DefaultOpenAIEnv = "OPENAI_API_KEY" +) + +type Config struct { + Version int `toml:"version"` + DBPath string `toml:"db_path"` + CacheDir string `toml:"cache_dir"` + VectorDir string `toml:"vector_dir"` + LogDir string `toml:"log_dir"` + GitHub GitHubConfig `toml:"github"` + OpenAI OpenAIConfig `toml:"openai"` + EmbeddingBasis string `toml:"embedding_basis"` + TUI TUIConfig `toml:"tui"` + Compat CompatConfig `toml:"compat"` +} + +type GitHubConfig struct { + TokenEnv string `toml:"token_env"` +} + +type OpenAIConfig struct { + APIKeyEnv string `toml:"api_key_env"` + SummaryModel string `toml:"summary_model"` + EmbedModel string `toml:"embed_model"` + BatchSize int `toml:"batch_size"` + Concurrency int `toml:"concurrency"` +} + +type TUIConfig struct { + DefaultSort string `toml:"default_sort"` +} + +type CompatConfig struct { + ReadGHCrawlEnv bool `toml:"read_ghcrawl_env"` +} + +type TokenResolution struct { + Value string + Source string +} + +func Default() Config { + home := homeDir() + base := filepath.Join(home, ".config", "gitcrawl") + return Config{ + Version: 1, + DBPath: filepath.Join(base, "gitcrawl.db"), + CacheDir: filepath.Join(base, "cache"), + VectorDir: filepath.Join(base, "vectors"), + LogDir: filepath.Join(base, "logs"), + EmbeddingBasis: "title_original", + GitHub: GitHubConfig{ + TokenEnv: DefaultTokenEnv, + }, + OpenAI: OpenAIConfig{ + APIKeyEnv: DefaultOpenAIEnv, + SummaryModel: "gpt-5.4", + EmbedModel: "text-embedding-3-small", + BatchSize: 64, + Concurrency: 2, + }, + TUI: TUIConfig{ + DefaultSort: "recent", + }, + Compat: CompatConfig{ + ReadGHCrawlEnv: true, + }, + } +} + +func ResolvePath(flagPath string) string { + if strings.TrimSpace(flagPath) != "" { + return expandHome(flagPath) + } + if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" { + return expandHome(envPath) + } + home := homeDir() + return filepath.Join(home, ".config", "gitcrawl", "config.toml") +} + +func Load(path string) (Config, error) { + cfg := Default() + resolved := ResolvePath(path) + data, err := os.ReadFile(resolved) + if err != nil { + return Config{}, err + } + if err := toml.Unmarshal(data, &cfg); err != nil { + return Config{}, fmt.Errorf("parse config: %w", err) + } + if err := cfg.Normalize(); err != nil { + return Config{}, err + } + return cfg, nil +} + +func Save(path string, cfg Config) error { + if err := cfg.Normalize(); err != nil { + return err + } + resolved := ResolvePath(path) + if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil { + return fmt.Errorf("create config dir: %w", err) + } + data, err := toml.Marshal(cfg) + if err != nil { + return fmt.Errorf("marshal config: %w", err) + } + return os.WriteFile(resolved, data, 0o600) +} + +func EnsureRuntimeDirs(cfg Config) error { + for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} { + if err := os.MkdirAll(expandHome(path), 0o755); err != nil { + return fmt.Errorf("create runtime dir %s: %w", path, err) + } + } + return nil +} + +func (c *Config) Normalize() error { + def := Default() + if c.Version == 0 { + c.Version = def.Version + } + if c.DBPath == "" { + c.DBPath = def.DBPath + } + if c.CacheDir == "" { + c.CacheDir = def.CacheDir + } + if c.VectorDir == "" { + c.VectorDir = def.VectorDir + } + if c.LogDir == "" { + c.LogDir = def.LogDir + } + if c.GitHub.TokenEnv == "" { + c.GitHub.TokenEnv = def.GitHub.TokenEnv + } + if c.OpenAI.APIKeyEnv == "" { + c.OpenAI.APIKeyEnv = def.OpenAI.APIKeyEnv + } + if c.OpenAI.SummaryModel == "" { + c.OpenAI.SummaryModel = envOrDefault("GITCRAWL_SUMMARY_MODEL", legacyEnv("GHCRAWL_SUMMARY_MODEL"), def.OpenAI.SummaryModel) + } + if c.OpenAI.EmbedModel == "" { + c.OpenAI.EmbedModel = envOrDefault("GITCRAWL_EMBED_MODEL", legacyEnv("GHCRAWL_EMBED_MODEL"), def.OpenAI.EmbedModel) + } + if c.OpenAI.BatchSize <= 0 { + c.OpenAI.BatchSize = def.OpenAI.BatchSize + } + if c.OpenAI.Concurrency <= 0 { + c.OpenAI.Concurrency = def.OpenAI.Concurrency + } + if c.EmbeddingBasis == "" { + c.EmbeddingBasis = def.EmbeddingBasis + } + if c.TUI.DefaultSort == "" { + c.TUI.DefaultSort = def.TUI.DefaultSort + } + c.DBPath = expandHome(envOrDefault("GITCRAWL_DB_PATH", legacyEnv("GHCRAWL_DB_PATH"), c.DBPath)) + c.CacheDir = expandHome(c.CacheDir) + c.VectorDir = expandHome(c.VectorDir) + c.LogDir = expandHome(c.LogDir) + return nil +} + +func ResolveGitHubToken(cfg Config) TokenResolution { + if value := strings.TrimSpace(os.Getenv(cfg.GitHub.TokenEnv)); value != "" { + return TokenResolution{Value: value, Source: cfg.GitHub.TokenEnv} + } + return TokenResolution{} +} + +func ResolveOpenAIKey(cfg Config) TokenResolution { + if value := strings.TrimSpace(os.Getenv(cfg.OpenAI.APIKeyEnv)); value != "" { + return TokenResolution{Value: value, Source: cfg.OpenAI.APIKeyEnv} + } + return TokenResolution{} +} + +func envOrDefault(primary, legacy, fallback string) string { + if value := strings.TrimSpace(os.Getenv(primary)); value != "" { + return value + } + if value := strings.TrimSpace(legacy); value != "" { + return value + } + return fallback +} + +func legacyEnv(name string) string { + return os.Getenv(name) +} + +func expandHome(path string) string { + if path == "~" { + return homeDir() + } + if strings.HasPrefix(path, "~/") { + return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/")) + } + return path +} + +func homeDir() string { + if home, err := os.UserHomeDir(); err == nil && home != "" { + return home + } + return "." +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..bec40ba --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,71 @@ +package config + +import ( + "os" + "path/filepath" + "testing" +) + +func TestSaveLoadRoundTrip(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "config.toml") + cfg := Default() + cfg.DBPath = filepath.Join(dir, "gitcrawl.db") + cfg.OpenAI.SummaryModel = "gpt-5-mini" + + if err := Save(path, cfg); err != nil { + t.Fatalf("save config: %v", err) + } + loaded, err := Load(path) + if err != nil { + t.Fatalf("load config: %v", err) + } + if loaded.DBPath != cfg.DBPath { + t.Fatalf("db path mismatch: got %q want %q", loaded.DBPath, cfg.DBPath) + } + if loaded.OpenAI.SummaryModel != "gpt-5-mini" { + t.Fatalf("summary model mismatch: %q", loaded.OpenAI.SummaryModel) + } +} + +func TestResolvePathUsesEnv(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "custom.toml") + t.Setenv(DefaultConfigEnv, path) + + if got := ResolvePath(""); got != path { + t.Fatalf("resolve path: got %q want %q", got, path) + } +} + +func TestNormalizeUsesDBEnv(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "override.db") + t.Setenv("GITCRAWL_DB_PATH", dbPath) + + cfg := Default() + cfg.DBPath = "" + if err := cfg.Normalize(); err != nil { + t.Fatalf("normalize: %v", err) + } + if cfg.DBPath != dbPath { + t.Fatalf("db path: got %q want %q", cfg.DBPath, dbPath) + } +} + +func TestResolveTokens(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test") + t.Setenv("OPENAI_API_KEY", "sk_test") + + cfg := Default() + if got := ResolveGitHubToken(cfg); got.Value != "ghp_test" || got.Source != "GITHUB_TOKEN" { + t.Fatalf("github token resolution mismatch: %#v", got) + } + if got := ResolveOpenAIKey(cfg); got.Value != "sk_test" || got.Source != "OPENAI_API_KEY" { + t.Fatalf("openai key resolution mismatch: %#v", got) + } +} + +func TestMain(m *testing.M) { + os.Exit(m.Run()) +}