diff --git a/README.md b/README.md index d8c3c3a..73a35af 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Homebrew configuration, not in these pure Nix package/module definitions. | [**poltergeist**](https://github.com/steipete/poltergeist) | Universal file watcher with auto-rebuild | | [**sag**](https://github.com/steipete/sag) | Command-line ElevenLabs TTS with mac-style flags | | [**imsg**](https://github.com/openclaw/imsg) | iMessage/SMS CLI | +| [**qmd**](https://github.com/tobi/qmd) | On-device hybrid search for markdown knowledge bases | ## Usage (as openclaw plugins) @@ -106,6 +107,10 @@ go run ./cmd/update-tools Fetches latest release versions/URLs/hashes and updates the Nix expressions. +QMD is source-packaged because upstream does not publish release assets. Keep it +fresh through the same maintainer automation path, but do not make its smoke +check pull embedding/reranking models; model prewarming belongs in nix-openclaw. + ## CI | Workflow | Schedule | What it does | diff --git a/cmd/update-tools/main.go b/cmd/update-tools/main.go index 7784189..aecad05 100644 --- a/cmd/update-tools/main.go +++ b/cmd/update-tools/main.go @@ -2,7 +2,9 @@ package main import ( "fmt" + "io" "log" + "net/http" "os" "path/filepath" "regexp" @@ -68,6 +70,107 @@ func updateSourceBlock(path, system, url, hash string) error { }) } +func readVersion(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + match := regexp.MustCompile(`version = "([^"]+)";`).FindStringSubmatch(string(data)) + if len(match) < 2 { + return "", fmt.Errorf("version not found in %s", path) + } + return match[1], nil +} + +func fetchText(url string) (string, error) { + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", err + } + if token := os.Getenv("GH_TOKEN"); token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("fetch %s: %s: %s", url, resp.Status, string(body)) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func qmdNodeModulesHash(upstreamFlake, system string) (string, error) { + re := regexp.MustCompile(fmt.Sprintf(`"?%s"?\s*=\s*"([^"]+)";`, regexp.QuoteMeta(system))) + match := re.FindStringSubmatch(upstreamFlake) + if len(match) < 2 { + return "", fmt.Errorf("qmd nodeModules hash for %s not found upstream", system) + } + hash := match[1] + if strings.Contains(hash, "AAAAAAAA") || strings.Contains(hash, "fake") { + return "", fmt.Errorf("qmd nodeModules hash for %s is not populated upstream", system) + } + return hash, nil +} + +func updateQMD(repoRoot string) error { + log.Printf("[update-tools] qmd") + qmdFile := filepath.Join(repoRoot, "nix", "pkgs", "qmd.nix") + currentVersion, err := readVersion(qmdFile) + if err != nil { + return err + } + + rel, err := internal.LatestRelease("tobi/qmd") + if err != nil { + return err + } + version := strings.TrimPrefix(rel.TagName, "v") + if currentVersion == version { + return nil + } + + srcHash, err := internal.PrefetchGitHub("tobi", "qmd", "v"+version) + if err != nil { + return err + } + upstreamFlake, err := fetchText(fmt.Sprintf("https://raw.githubusercontent.com/tobi/qmd/v%s/flake.nix", version)) + if err != nil { + return err + } + nodeHashes := map[string]string{} + for _, system := range []string{"aarch64-darwin", "x86_64-linux"} { + hash, err := qmdNodeModulesHash(upstreamFlake, system) + if err != nil { + return err + } + nodeHashes[system] = hash + } + + if err := internal.ReplaceOnce(qmdFile, regexp.MustCompile(`version = "[^"]+";`), fmt.Sprintf(`version = "%s";`, version)); err != nil { + return err + } + srcRe := regexp.MustCompile(`(?s)src = fetchFromGitHub \{.*?hash = "sha256-[^"]+";`) + if err := internal.ReplaceOnceFunc(qmdFile, srcRe, func(s string) string { + return regexp.MustCompile(`hash = "sha256-[^"]+";`).ReplaceAllString(s, fmt.Sprintf(`hash = "%s";`, srcHash)) + }); err != nil { + return err + } + for system, hash := range nodeHashes { + re := regexp.MustCompile(fmt.Sprintf(`"%s" = "sha256-[^"]+";`, regexp.QuoteMeta(system))) + if err := internal.ReplaceOnce(qmdFile, re, fmt.Sprintf(`"%s" = "%s";`, system, hash)); err != nil { + return err + } + } + return nil +} + func updateSummarize(repoRoot string) error { log.Printf("[update-tools] summarize") summarizeFile := filepath.Join(repoRoot, "nix", "pkgs", "summarize.nix") @@ -202,7 +305,7 @@ func main() { Name: "sonoscli", Repo: "steipete/sonoscli", Assets: []AssetSpec{ - {System: "aarch64-darwin", Regex: regexp.MustCompile(`sonoscli-macos-arm64\.tar\.gz`)}, + {System: "aarch64-darwin", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_darwin_arm64\.tar\.gz`)}, {System: "x86_64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_amd64\.tar\.gz`)}, {System: "aarch64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_arm64\.tar\.gz`)}, }, @@ -246,6 +349,9 @@ func main() { if err := updateSummarize(repoRoot); err != nil { log.Fatalf("update summarize failed: %v", err) } + if err := updateQMD(repoRoot); err != nil { + log.Fatalf("update qmd failed: %v", err) + } for _, tool := range tools { if err := updateTool(tool); err != nil { if tool.Optional { diff --git a/cmd/update-tools/main_test.go b/cmd/update-tools/main_test.go new file mode 100644 index 0000000..33cec62 --- /dev/null +++ b/cmd/update-tools/main_test.go @@ -0,0 +1,33 @@ +package main + +import "testing" + +func TestQMDNodeModulesHash(t *testing.T) { + upstream := ` +nodeModulesHashes = { + x86_64-linux = "sha256-linux"; + aarch64-darwin = "sha256-darwin"; +}; +` + + got, err := qmdNodeModulesHash(upstream, "aarch64-darwin") + if err != nil { + t.Fatal(err) + } + if got != "sha256-darwin" { + t.Fatalf("got %q", got) + } +} + +func TestQMDNodeModulesHashRejectsFake(t *testing.T) { + upstream := ` +nodeModulesHashes = { + aarch64-darwin = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; +}; +` + + _, err := qmdNodeModulesHash(upstream, "aarch64-darwin") + if err == nil { + t.Fatal("expected fake hash to be rejected") + } +} diff --git a/flake.nix b/flake.nix index da926b6..a174e39 100644 --- a/flake.nix +++ b/flake.nix @@ -22,6 +22,7 @@ poltergeist = [ "aarch64-darwin" ]; sag = [ "aarch64-darwin" "x86_64-linux" ]; imsg = [ "aarch64-darwin" ]; + qmd = [ "aarch64-darwin" "x86_64-linux" ]; }; in { packages = forAllSystems (system: @@ -66,8 +67,22 @@ // (lib.optionalAttrs (supports "imsg") { imsg = pkgs.callPackage ./nix/pkgs/imsg.nix {}; }) + // (lib.optionalAttrs (supports "qmd") { + qmd = pkgs.callPackage ./nix/pkgs/qmd.nix {}; + }) ); - checks = forAllSystems (system: self.packages.${system}); + checks = forAllSystems (system: + let + pkgs = import nixpkgs { inherit system; }; + packages = self.packages.${system}; + in + packages + // (lib.optionalAttrs (packages ? qmd) { + qmd-smoke = pkgs.callPackage ./nix/checks/qmd-smoke.nix { + qmd = packages.qmd; + }; + }) + ); }; } diff --git a/nix/checks/qmd-smoke.nix b/nix/checks/qmd-smoke.nix new file mode 100644 index 0000000..534e432 --- /dev/null +++ b/nix/checks/qmd-smoke.nix @@ -0,0 +1,25 @@ +{ + runCommand, + qmd, +}: + +runCommand "qmd-smoke" { nativeBuildInputs = [ qmd ]; } '' + set -eu + + export HOME="$TMPDIR/home" + export XDG_CONFIG_HOME="$TMPDIR/config" + export XDG_CACHE_HOME="$TMPDIR/cache" + export XDG_DATA_HOME="$TMPDIR/data" + mkdir -p "$HOME" "$XDG_CONFIG_HOME" "$XDG_CACHE_HOME" "$XDG_DATA_HOME" "$TMPDIR/notes" + + printf '%s\n\n%s\n' '# Smoke' 'qmd packaging smoke' > "$TMPDIR/notes/smoke.md" + + qmd --help >/dev/null + qmd collection list >/dev/null + qmd collection add "$TMPDIR/notes" --name smoke + qmd update + qmd search packaging --json | grep -q packaging + qmd status >/dev/null + + touch "$out" +'' diff --git a/nix/pkgs/qmd.nix b/nix/pkgs/qmd.nix new file mode 100644 index 0000000..39cd2ec --- /dev/null +++ b/nix/pkgs/qmd.nix @@ -0,0 +1,121 @@ +{ + lib, + stdenv, + stdenvNoCC, + fetchFromGitHub, + bun, + makeWrapper, + nodejs, + node-gyp, + python3, + sqlite, + darwin, +}: + +let + pname = "qmd"; + version = "2.1.0"; + + src = fetchFromGitHub { + owner = "tobi"; + repo = "qmd"; + rev = "v${version}"; + hash = "sha256-bqIVaNRTa8H5vrw3RwsD7QdtTa0xNvRuEVzlzE1hIBQ="; + }; + + nodeModulesHashes = { + "aarch64-darwin" = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c="; + "x86_64-linux" = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w="; + }; + + system = stdenv.hostPlatform.system; + + nodeModules = stdenvNoCC.mkDerivation { + pname = "qmd-node-modules"; + inherit version src; + + impureEnvVars = lib.fetchers.proxyImpureEnvVars ++ [ + "GIT_PROXY_COMMAND" + "SOCKS_SERVER" + ]; + + nativeBuildInputs = [ bun ]; + + dontConfigure = true; + + buildPhase = '' + runHook preBuild + export HOME="$(mktemp -d)" + bun install \ + --backend copyfile \ + --frozen-lockfile \ + --ignore-scripts \ + --no-progress \ + --production + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + mkdir -p "$out" + cp -R node_modules "$out/" + runHook postInstall + ''; + + dontFixup = true; + + outputHash = nodeModulesHashes.${system}; + outputHashAlgo = "sha256"; + outputHashMode = "recursive"; + }; +in +stdenv.mkDerivation { + inherit pname version src; + + nativeBuildInputs = [ + bun + makeWrapper + nodejs + node-gyp + python3 + ] + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + darwin.cctools + ]; + + buildInputs = [ sqlite ]; + + dontConfigure = true; + + buildPhase = '' + runHook preBuild + export HOME="$(mktemp -d)" + + cp -R ${nodeModules}/node_modules ./ + chmod -R u+w node_modules + + (cd node_modules/better-sqlite3 && node-gyp rebuild --release) + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + mkdir -p "$out/bin" "$out/lib/qmd" + + cp -r node_modules src package.json "$out/lib/qmd/" + + makeWrapper ${bun}/bin/bun "$out/bin/qmd" \ + --add-flags "$out/lib/qmd/src/cli/qmd.ts" \ + --set DYLD_LIBRARY_PATH "${sqlite.out}/lib" \ + --set LD_LIBRARY_PATH "${sqlite.out}/lib" + runHook postInstall + ''; + + meta = with lib; { + description = "On-device hybrid search for markdown knowledge bases"; + homepage = "https://github.com/tobi/qmd"; + license = licenses.mit; + platforms = builtins.attrNames nodeModulesHashes; + mainProgram = "qmd"; + }; +} diff --git a/tools/qmd/flake.nix b/tools/qmd/flake.nix new file mode 100644 index 0000000..6551575 --- /dev/null +++ b/tools/qmd/flake.nix @@ -0,0 +1,48 @@ +{ + description = "openclaw plugin: qmd"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs?rev=16c7794d0a28b5a37904d55bcca36003b9109aaa&narHash=sha256-fFUnEYMla8b7UKjijLnMe%2BoVFOz6HjijGGNS1l7dYaQ%3D"; + root.url = "../.."; + }; + + outputs = + { + self, + nixpkgs, + root, + }: + let + lib = nixpkgs.lib; + systems = builtins.attrNames root.packages; + pluginFor = + system: + let + packagesForSystem = root.packages.${system} or { }; + qmd = packagesForSystem.qmd or null; + in + if qmd == null then + null + else + { + name = "qmd"; + skills = [ ./skills/qmd ]; + packages = [ qmd ]; + needs = { + stateDirs = [ ".local/share/qmd" ]; + requiredEnv = [ ]; + }; + }; + in + { + packages = lib.genAttrs systems ( + system: + let + qmd = (root.packages.${system} or { }).qmd or null; + in + if qmd == null then { } else { qmd = qmd; } + ); + + openclawPlugin = pluginFor; + }; +} diff --git a/tools/qmd/skills/qmd/SKILL.md b/tools/qmd/skills/qmd/SKILL.md new file mode 100644 index 0000000..171ba83 --- /dev/null +++ b/tools/qmd/skills/qmd/SKILL.md @@ -0,0 +1,143 @@ +--- +name: qmd +description: Search markdown knowledge bases, notes, and documentation using QMD. Use when users ask to search notes, find documents, or look up information. +license: MIT +compatibility: Requires qmd CLI or MCP server. In Nix OpenClaw, use the qmd package/plugin from nix-openclaw-tools. +metadata: + author: tobi + version: "2.0.0" +allowed-tools: Bash(qmd:*), mcp__qmd__* +--- + +# QMD - Quick Markdown Search + +Local search engine for markdown content. + +## Status + +!`qmd status 2>/dev/null || echo "qmd CLI not found on this runtime PATH"` + +## MCP: `query` + +```json +{ + "searches": [ + { "type": "lex", "query": "CAP theorem consistency" }, + { "type": "vec", "query": "tradeoff between consistency and availability" } + ], + "collections": ["docs"], + "limit": 10 +} +``` + +### Query Types + +| Type | Method | Input | +|------|--------|-------| +| `lex` | BM25 | Keywords — exact terms, names, code | +| `vec` | Vector | Question — natural language | +| `hyde` | Vector | Answer — hypothetical result (50-100 words) | + +### Writing Good Queries + +**lex (keyword)** +- 2-5 terms, no filler words +- Exact phrase: `"connection pool"` (quoted) +- Exclude terms: `performance -sports` (minus prefix) +- Code identifiers work: `handleError async` + +**vec (semantic)** +- Full natural language question +- Be specific: `"how does the rate limiter handle burst traffic"` +- Include context: `"in the payment service, how are refunds processed"` + +**hyde (hypothetical document)** +- Write 50-100 words of what the *answer* looks like +- Use the vocabulary you expect in the result + +**expand (auto-expand)** +- Use a single-line query (implicit) or `expand: question` on its own line +- Lets the local LLM generate lex/vec/hyde variations +- Do not mix `expand:` with other typed lines — it's either a standalone expand query or a full query document + +### Intent (Disambiguation) + +When a query term is ambiguous, add `intent` to steer results: + +```json +{ + "searches": [ + { "type": "lex", "query": "performance" } + ], + "intent": "web page load times and Core Web Vitals" +} +``` + +Intent affects expansion, reranking, chunk selection, and snippet extraction. It does not search on its own — it's a steering signal that disambiguates queries like "performance" (web-perf vs team health vs fitness). + +### Combining Types + +| Goal | Approach | +|------|----------| +| Know exact terms | `lex` only | +| Don't know vocabulary | Use a single-line query (implicit `expand:`) or `vec` | +| Best recall | `lex` + `vec` | +| Complex topic | `lex` + `vec` + `hyde` | +| Ambiguous query | Add `intent` to any combination above | + +First query gets 2x weight in fusion — put your best guess first. + +### Lex Query Syntax + +| Syntax | Meaning | Example | +|--------|---------|---------| +| `term` | Prefix match | `perf` matches "performance" | +| `"phrase"` | Exact phrase | `"rate limiter"` | +| `-term` | Exclude | `performance -sports` | + +Note: `-term` only works in lex queries, not vec/hyde. + +### Collection Filtering + +```json +{ "collections": ["docs"] } // Single +{ "collections": ["docs", "notes"] } // Multiple (OR) +``` + +Omit to search all collections. + +## Other MCP Tools + +| Tool | Use | +|------|-----| +| `get` | Retrieve doc by path or `#docid` | +| `multi_get` | Retrieve multiple by glob/list | +| `status` | Collections and health | + +## CLI + +```bash +qmd query "question" # Auto-expand + rerank +qmd query $'lex: X\nvec: Y' # Structured +qmd query $'expand: question' # Explicit expand +qmd query --json --explain "q" # Show score traces (RRF + rerank blend) +qmd search "keywords" # BM25 only (no LLM) +qmd get "#abc123" # By docid +qmd multi-get "journals/2026-*.md" -l 40 # Batch pull snippets by glob +qmd multi-get notes/foo.md,notes/bar.md # Comma-separated list, preserves order +``` + +## HTTP API + +```bash +curl -X POST http://localhost:8181/query \ + -H "Content-Type: application/json" \ + -d '{"searches": [{"type": "lex", "query": "test"}]}' +``` + +## Setup + +```bash +qmd collection add ~/notes --name notes +qmd embed +```