commit cebf07ccaebf0d3b088982d642d156065b46f250 Author: Vincent Koc Date: Fri May 1 08:34:01 2026 -0700 chore: bootstrap crawlkit module diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..dd71d24 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,17 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = tab +indent_size = 4 + +[*.md] +indent_style = space +indent_size = 2 + +[*.yml] +indent_style = space +indent_size = 2 + diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f3c1298 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.go text eol=lf +*.md text eol=lf +*.yml text eol=lf +*.yaml text eol=lf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5f26416 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +bin/ +dist/ +coverage.out +*.db +*.db-shm +*.db-wal +.DS_Store + diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7271018 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog + +## Unreleased + +- Initial `crawlkit` module scaffold. + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..f84ea28 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,19 @@ +# Contributing + +## Development + +```bash +go test ./... +go vet ./... +go mod tidy +git diff --exit-code -- go.mod go.sum +``` + +Keep public APIs small. `crawlkit` should own reusable local archive mechanics, +not provider-specific Slack, Discord, Notion, or GitHub behavior. + +## Compatibility + +This module starts at `v0`, so APIs may still change. Prefer additive changes +and keep downstream crawler rewires narrow. + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..01668fd --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2026 Vincent Koc + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..ecf98ed --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# crawlkit + +Shared Go infrastructure for local-first crawler archives. + +`crawlkit` is not a universal Slack, Discord, Notion, or GitHub crawler. It is +the reusable foundation beneath those tools: SQLite hygiene, TOML config +defaults, portable JSONL/Gzip packing, git-backed snapshot sharing, sync state, +CLI output helpers, and safe desktop-cache snapshot utilities. + +## Install + +```bash +go get github.com/vincentkoc/crawlkit@v0.1.0 +``` + +Go packages are published by tagging this repository. There is no separate +package registry step. + +## Packages + +- `configkit`: standard TOML config paths, runtime dirs, and token diagnostics. +- `sqlitekit`: SQLite open/read-only/transaction/query helpers. +- `pack`: `manifest.json` plus JSONL/Gzip table snapshot export and import. +- `gitshare`: clone/init/pull/commit/push helpers for private snapshot repos. +- `syncstate`: generic crawler cursor and freshness records. +- `cliout`: text/json/log output helpers. +- `desktopcache`: safe read-only local cache snapshot helpers. + +## Safety + +Library tests use temporary directories. They do not touch app runtime stores +such as `~/.config/gitcrawl`, `~/.slacrawl`, `~/.discrawl`, or `~/.notcrawl`. + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..49f4d40 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/vincentkoc/crawlkit + +go 1.26.2