feat: add config loading

This commit is contained in:
Vincent Koc 2026-04-26 23:02:45 -07:00
parent 7b504d15ac
commit 903f219c19
No known key found for this signature in database
4 changed files with 302 additions and 0 deletions

2
go.mod
View File

@ -1,3 +1,5 @@
module github.com/openclaw/gitcrawl
go 1.26.2
require github.com/pelletier/go-toml/v2 v2.3.0

2
go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=

227
internal/config/config.go Normal file
View File

@ -0,0 +1,227 @@
package config
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/pelletier/go-toml/v2"
)
const (
DefaultConfigEnv = "GITCRAWL_CONFIG"
DefaultTokenEnv = "GITHUB_TOKEN"
DefaultOpenAIEnv = "OPENAI_API_KEY"
)
type Config struct {
Version int `toml:"version"`
DBPath string `toml:"db_path"`
CacheDir string `toml:"cache_dir"`
VectorDir string `toml:"vector_dir"`
LogDir string `toml:"log_dir"`
GitHub GitHubConfig `toml:"github"`
OpenAI OpenAIConfig `toml:"openai"`
EmbeddingBasis string `toml:"embedding_basis"`
TUI TUIConfig `toml:"tui"`
Compat CompatConfig `toml:"compat"`
}
type GitHubConfig struct {
TokenEnv string `toml:"token_env"`
}
type OpenAIConfig struct {
APIKeyEnv string `toml:"api_key_env"`
SummaryModel string `toml:"summary_model"`
EmbedModel string `toml:"embed_model"`
BatchSize int `toml:"batch_size"`
Concurrency int `toml:"concurrency"`
}
type TUIConfig struct {
DefaultSort string `toml:"default_sort"`
}
type CompatConfig struct {
ReadGHCrawlEnv bool `toml:"read_ghcrawl_env"`
}
type TokenResolution struct {
Value string
Source string
}
func Default() Config {
home := homeDir()
base := filepath.Join(home, ".config", "gitcrawl")
return Config{
Version: 1,
DBPath: filepath.Join(base, "gitcrawl.db"),
CacheDir: filepath.Join(base, "cache"),
VectorDir: filepath.Join(base, "vectors"),
LogDir: filepath.Join(base, "logs"),
EmbeddingBasis: "title_original",
GitHub: GitHubConfig{
TokenEnv: DefaultTokenEnv,
},
OpenAI: OpenAIConfig{
APIKeyEnv: DefaultOpenAIEnv,
SummaryModel: "gpt-5.4",
EmbedModel: "text-embedding-3-small",
BatchSize: 64,
Concurrency: 2,
},
TUI: TUIConfig{
DefaultSort: "recent",
},
Compat: CompatConfig{
ReadGHCrawlEnv: true,
},
}
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return expandHome(flagPath)
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return expandHome(envPath)
}
home := homeDir()
return filepath.Join(home, ".config", "gitcrawl", "config.toml")
}
func Load(path string) (Config, error) {
cfg := Default()
resolved := ResolvePath(path)
data, err := os.ReadFile(resolved)
if err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
return cfg, nil
}
func Save(path string, cfg Config) error {
if err := cfg.Normalize(); err != nil {
return err
}
resolved := ResolvePath(path)
if err := os.MkdirAll(filepath.Dir(resolved), 0o755); err != nil {
return fmt.Errorf("create config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(resolved, data, 0o600)
}
func EnsureRuntimeDirs(cfg Config) error {
for _, path := range []string{cfg.CacheDir, cfg.VectorDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} {
if err := os.MkdirAll(expandHome(path), 0o755); err != nil {
return fmt.Errorf("create runtime dir %s: %w", path, err)
}
}
return nil
}
func (c *Config) Normalize() error {
def := Default()
if c.Version == 0 {
c.Version = def.Version
}
if c.DBPath == "" {
c.DBPath = def.DBPath
}
if c.CacheDir == "" {
c.CacheDir = def.CacheDir
}
if c.VectorDir == "" {
c.VectorDir = def.VectorDir
}
if c.LogDir == "" {
c.LogDir = def.LogDir
}
if c.GitHub.TokenEnv == "" {
c.GitHub.TokenEnv = def.GitHub.TokenEnv
}
if c.OpenAI.APIKeyEnv == "" {
c.OpenAI.APIKeyEnv = def.OpenAI.APIKeyEnv
}
if c.OpenAI.SummaryModel == "" {
c.OpenAI.SummaryModel = envOrDefault("GITCRAWL_SUMMARY_MODEL", legacyEnv("GHCRAWL_SUMMARY_MODEL"), def.OpenAI.SummaryModel)
}
if c.OpenAI.EmbedModel == "" {
c.OpenAI.EmbedModel = envOrDefault("GITCRAWL_EMBED_MODEL", legacyEnv("GHCRAWL_EMBED_MODEL"), def.OpenAI.EmbedModel)
}
if c.OpenAI.BatchSize <= 0 {
c.OpenAI.BatchSize = def.OpenAI.BatchSize
}
if c.OpenAI.Concurrency <= 0 {
c.OpenAI.Concurrency = def.OpenAI.Concurrency
}
if c.EmbeddingBasis == "" {
c.EmbeddingBasis = def.EmbeddingBasis
}
if c.TUI.DefaultSort == "" {
c.TUI.DefaultSort = def.TUI.DefaultSort
}
c.DBPath = expandHome(envOrDefault("GITCRAWL_DB_PATH", legacyEnv("GHCRAWL_DB_PATH"), c.DBPath))
c.CacheDir = expandHome(c.CacheDir)
c.VectorDir = expandHome(c.VectorDir)
c.LogDir = expandHome(c.LogDir)
return nil
}
func ResolveGitHubToken(cfg Config) TokenResolution {
if value := strings.TrimSpace(os.Getenv(cfg.GitHub.TokenEnv)); value != "" {
return TokenResolution{Value: value, Source: cfg.GitHub.TokenEnv}
}
return TokenResolution{}
}
func ResolveOpenAIKey(cfg Config) TokenResolution {
if value := strings.TrimSpace(os.Getenv(cfg.OpenAI.APIKeyEnv)); value != "" {
return TokenResolution{Value: value, Source: cfg.OpenAI.APIKeyEnv}
}
return TokenResolution{}
}
func envOrDefault(primary, legacy, fallback string) string {
if value := strings.TrimSpace(os.Getenv(primary)); value != "" {
return value
}
if value := strings.TrimSpace(legacy); value != "" {
return value
}
return fallback
}
func legacyEnv(name string) string {
return os.Getenv(name)
}
func expandHome(path string) string {
if path == "~" {
return homeDir()
}
if strings.HasPrefix(path, "~/") {
return filepath.Join(homeDir(), strings.TrimPrefix(path, "~/"))
}
return path
}
func homeDir() string {
if home, err := os.UserHomeDir(); err == nil && home != "" {
return home
}
return "."
}

View File

@ -0,0 +1,71 @@
package config
import (
"os"
"path/filepath"
"testing"
)
func TestSaveLoadRoundTrip(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "config.toml")
cfg := Default()
cfg.DBPath = filepath.Join(dir, "gitcrawl.db")
cfg.OpenAI.SummaryModel = "gpt-5-mini"
if err := Save(path, cfg); err != nil {
t.Fatalf("save config: %v", err)
}
loaded, err := Load(path)
if err != nil {
t.Fatalf("load config: %v", err)
}
if loaded.DBPath != cfg.DBPath {
t.Fatalf("db path mismatch: got %q want %q", loaded.DBPath, cfg.DBPath)
}
if loaded.OpenAI.SummaryModel != "gpt-5-mini" {
t.Fatalf("summary model mismatch: %q", loaded.OpenAI.SummaryModel)
}
}
func TestResolvePathUsesEnv(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "custom.toml")
t.Setenv(DefaultConfigEnv, path)
if got := ResolvePath(""); got != path {
t.Fatalf("resolve path: got %q want %q", got, path)
}
}
func TestNormalizeUsesDBEnv(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "override.db")
t.Setenv("GITCRAWL_DB_PATH", dbPath)
cfg := Default()
cfg.DBPath = ""
if err := cfg.Normalize(); err != nil {
t.Fatalf("normalize: %v", err)
}
if cfg.DBPath != dbPath {
t.Fatalf("db path: got %q want %q", cfg.DBPath, dbPath)
}
}
func TestResolveTokens(t *testing.T) {
t.Setenv("GITHUB_TOKEN", "ghp_test")
t.Setenv("OPENAI_API_KEY", "sk_test")
cfg := Default()
if got := ResolveGitHubToken(cfg); got.Value != "ghp_test" || got.Source != "GITHUB_TOKEN" {
t.Fatalf("github token resolution mismatch: %#v", got)
}
if got := ResolveOpenAIKey(cfg); got.Value != "sk_test" || got.Source != "OPENAI_API_KEY" {
t.Fatalf("openai key resolution mismatch: %#v", got)
}
}
func TestMain(m *testing.M) {
os.Exit(m.Run())
}