chore: prepare 0.1.0 release

This commit is contained in:
Peter Steinberger 2026-05-08 17:13:41 +01:00
parent af7de2621d
commit 33ddba59a7
No known key found for this signature in database
7 changed files with 507 additions and 13 deletions

157
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,157 @@
name: ci
on:
pull_request:
push:
branches:
- main
permissions:
contents: read
concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Lint
uses: golangci/golangci-lint-action@v9.2.0
with:
version: v2.11.1
- name: Install analyzers
run: |
go install honnef.co/go/tools/cmd/staticcheck@v0.7.0
go install mvdan.cc/gofumpt@v0.9.2
go install github.com/securego/gosec/v2/cmd/gosec@v2.25.0
- name: Vet
run: go vet ./...
- name: Staticcheck
run: '"$(go env GOPATH)/bin/staticcheck" ./...'
- name: Gofumpt
run: |
changed="$("$(go env GOPATH)/bin/gofumpt" -l .)"
if [ -n "$changed" ]; then
printf 'gofumpt wants changes in:\n%s\n' "$changed"
exit 1
fi
- name: Gosec
run: |
"$(go env GOPATH)/bin/gosec" -exclude=G101,G115,G202,G301,G304 ./...
test:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Test with coverage
run: go test -count=1 ./... -coverprofile=coverage.out
- name: Test with race detector
run: go test -count=1 -race ./...
- name: Enforce coverage floor
run: ./scripts/coverage.sh 35.0
- name: Build
run: go build ./cmd/telecrawl
deps:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Verify module cache
run: go mod verify
- name: Check go.mod tidy
run: |
go mod tidy
git diff --exit-code -- go.mod go.sum
- name: Install govulncheck
run: go install golang.org/x/vuln/cmd/govulncheck@v1.3.0
- name: Run govulncheck
run: '"$(go env GOPATH)/bin/govulncheck" ./...'
release-check:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.1.0
with:
distribution: goreleaser
version: "~> v2"
args: release --snapshot --clean --skip=publish
secrets:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Install gitleaks
run: go install github.com/zricethezav/gitleaks/v8@v8.30.1
- name: Scan git history
run: |
"$(go env GOPATH)/bin/gitleaks" git --no-banner --redact
- name: Scan working tree
run: |
"$(go env GOPATH)/bin/gitleaks" dir . --no-banner --redact

106
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,106 @@
name: release
on:
push:
tags:
- "v*"
workflow_dispatch:
inputs:
tag:
description: "Tag to (re)release (e.g. v0.1.0)"
required: true
type: string
permissions:
contents: write
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Stash GoReleaser config
run: cp .goreleaser.yaml /tmp/.goreleaser.yaml
- name: Checkout release tag
if: ${{ github.event_name == 'workflow_dispatch' }}
run: git checkout ${{ inputs.tag }}
- name: GoReleaser
uses: goreleaser/goreleaser-action@v7.1.0
with:
distribution: goreleaser
version: "~> v2"
args: release --clean --config /tmp/.goreleaser.yaml
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
update-homebrew-tap:
runs-on: ubuntu-latest
needs: goreleaser
steps:
- name: Resolve release tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "RELEASE_TAG=${{ inputs.tag }}" >> "$GITHUB_ENV"
else
echo "RELEASE_TAG=${{ github.ref_name }}" >> "$GITHUB_ENV"
fi
- name: Dispatch tap formula update
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}
run: |
if [ -z "$GH_TOKEN" ]; then
echo "::error::Set HOMEBREW_TAP_TOKEN with workflow access to steipete/homebrew-tap"
exit 1
fi
request_id="telecrawl-${RELEASE_TAG}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
expected_title="Update telecrawl for ${RELEASE_TAG} (${request_id})"
gh workflow run update-formula.yml \
--repo steipete/homebrew-tap \
--ref main \
-f formula=telecrawl \
-f tag="$RELEASE_TAG" \
-f repository=openclaw/telecrawl \
-f description="Telegram Desktop archive CLI with encrypted Git backups" \
-f artifact_template="{formula}_{version}_{target}.tar.gz" \
-f request_id="$request_id"
run_id=""
for _ in {1..30}; do
run_id=$(gh run list \
--repo steipete/homebrew-tap \
--workflow update-formula.yml \
--branch main \
--event workflow_dispatch \
--limit 20 \
--json databaseId,displayTitle \
--jq ".[] | select(.displayTitle == \"$expected_title\") | .databaseId" | head -n1)
if [ -n "$run_id" ]; then
break
fi
sleep 5
done
if [ -z "$run_id" ]; then
echo "::error::Could not find tap workflow run with title: $expected_title"
exit 1
fi
gh run watch "$run_id" \
--repo steipete/homebrew-tap \
--exit-status \
--interval 10

36
.goreleaser.yaml Normal file
View File

@ -0,0 +1,36 @@
version: 2
project_name: telecrawl
changelog:
disable: true
builds:
- id: telecrawl
main: ./cmd/telecrawl
binary: telecrawl
env:
- CGO_ENABLED=0
ldflags:
- -s -w -X github.com/openclaw/telecrawl/internal/cli.version={{ .Version }}
targets:
- darwin_amd64
- darwin_arm64
- linux_amd64
- linux_arm64
- windows_amd64
- windows_arm64
archives:
- ids:
- telecrawl
formats:
- tar.gz
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
format_overrides:
- goos: windows
formats:
- zip
checksum:
name_template: checksums.txt

25
CHANGELOG.md Normal file
View File

@ -0,0 +1,25 @@
# Changelog
All notable changes to this project are documented here.
The format follows Keep a Changelog, and this project uses Semantic Versioning.
## [Unreleased]
## [0.1.0] - 2026-05-08
### Added
- Initial Telegram Desktop archive CLI with `doctor`, `import`, `status`,
`chats`, `messages`, and FTS-backed `search` commands.
- Import bridge for Telegram Desktop `tdata` using `opentele2` and Telethon,
with `telecrawl deps install` to create the local Python environment.
- Local SQLite archive at `~/.telecrawl/telecrawl.db`, including chat/message
counts, unread counts, media metadata, and sync state.
- Encrypted Git backups with `backup init`, `backup push`, `backup pull`, and
`backup status`, using reusable `crawlkit` age-encrypted JSONL/Gzip shard
helpers.
- Multi-machine backup support via age recipients, manifest verification,
shard hash checks, and restore into a fresh archive database.
- CI and release automation for linting, tests, secret scanning, GoReleaser
artifacts, and Homebrew tap updates.

178
README.md
View File

@ -2,49 +2,203 @@
Telegram Desktop archive CLI.
`telecrawl` reads your local Telegram Desktop `tdata` through `opentele2` /
Telethon, stores a searchable SQLite archive in `~/.telecrawl/telecrawl.db`,
and can back it up to GitHub as encrypted age shards.
`telecrawl` reads your local Telegram Desktop `tdata` through `opentele2` and
Telethon, stores a searchable SQLite archive in `~/.telecrawl/telecrawl.db`, and
can back it up to GitHub as encrypted age shards.
It is local-first:
- Normal archive/search commands do not upload data.
- `backup push` uploads only age-encrypted shards when you run it explicitly.
- Telegram message text, chat names, sender names, and media metadata stay inside
encrypted backup payloads.
## Install
```bash
brew tap steipete/tap
brew install telecrawl
```
Or install with Go:
```bash
go install github.com/openclaw/telecrawl/cmd/telecrawl@latest
```
## Setup
Install the Python bridge used for Telegram Desktop `tdata` imports:
```bash
telecrawl deps install
```
This creates `~/.telecrawl/venv` and installs `opentele2` plus Telethon.
## Import
```bash
telecrawl doctor
telecrawl import
telecrawl status
telecrawl chats --limit 20
telecrawl messages --limit 20
telecrawl search "query"
```
Import limits default to the latest 200 dialogs and 500 messages per dialog.
Import defaults to:
- latest `200` dialogs
- latest `500` messages per dialog
Use `0` for no limit:
```bash
telecrawl import --dialogs-limit 0 --messages-limit 0
```
Useful reads:
```bash
telecrawl chats --limit 20
telecrawl chats --unread
telecrawl messages --limit 20
telecrawl messages --chat CHAT_ID --after 2026-01-01
telecrawl search "query"
telecrawl search "query" --chat CHAT_ID
```
Add `--json` before the command for machine-readable output:
```bash
telecrawl --json status
telecrawl --json search "invoice"
```
## Data Paths
Defaults:
- Telegram Desktop source: `~/Library/Application Support/Telegram Desktop/tdata`
- archive DB: `~/.telecrawl/telecrawl.db`
- Python bridge venv: `~/.telecrawl/venv`
- Telethon sessions: `~/.telecrawl/sessions/`
- backup config: `~/.telecrawl/backup.json`
- age identity: `~/.telecrawl/age.key`
- backup checkout: `~/Projects/backup-telecrawl`
Override the archive DB:
```bash
telecrawl --db /tmp/telecrawl.db status
```
Override the Telegram Desktop source:
```bash
telecrawl --source "/path/to/tdata" doctor
telecrawl --source "/path/to/tdata" import
```
## Backup
Create `https://github.com/steipete/backup-telecrawl` first, then:
Create `https://github.com/steipete/backup-telecrawl` first, then initialize:
```bash
telecrawl backup init
telecrawl backup push
```
Backup payloads are encrypted before Git sees them. Cleartext Git metadata is
limited to manifest counts, shard paths, export time, public age recipients,
encrypted sizes, and hashes.
The default backup config points at:
Restore:
```json
{
"repo": "~/Projects/backup-telecrawl",
"remote": "https://github.com/steipete/backup-telecrawl.git",
"identity": "~/.telecrawl/age.key"
}
```
Use a different repository or config path:
```bash
telecrawl backup init \
--config ~/.telecrawl/backup.json \
--repo ~/Projects/backup-telecrawl \
--remote https://github.com/steipete/backup-telecrawl.git
```
Inspect backup metadata:
```bash
telecrawl backup status
```
Restore into the current archive DB:
```bash
telecrawl backup pull
telecrawl status
```
Restore into a throwaway DB for validation:
```bash
telecrawl --db /tmp/telecrawl-restore-test.db backup pull
telecrawl --db /tmp/telecrawl-restore-test.db status
```
## Backup Security Model
Backup shards are JSONL, gzip-compressed with deterministic gzip metadata, and
encrypted with age before Git sees them.
Git can still see cleartext metadata:
- export time
- public age recipients
- table names
- row counts
- shard paths
- encrypted byte sizes
- plaintext shard hashes
- backup cadence and which encrypted shards changed
Git cannot read message text, chat names, sender names, or media metadata without
an age identity.
Keep `~/.telecrawl/age.key` private. If you lose it and no other recipient can
decrypt the backup, the encrypted backup cannot be restored.
## Multi-Machine Backups
On another machine:
```bash
telecrawl backup init --no-push
cat ~/.telecrawl/backup.json
```
Copy that machine's public `recipient` into the first machine's
`~/.telecrawl/backup.json`, then re-encrypt current shards:
```bash
telecrawl backup push
```
The private `AGE-SECRET-KEY-...` identity must not be committed or shared.
## Reset
Remove local state:
```bash
rm -rf ~/.telecrawl
```
Remove only the archive:
```bash
rm -f ~/.telecrawl/telecrawl.db ~/.telecrawl/telecrawl.db-*
```
Do not delete `~/.telecrawl/age.key` unless you have another working backup
recipient or you no longer need to restore existing encrypted backups.

View File

@ -1,3 +1,3 @@
package cli
var version = "dev"
var version = "0.1.0"

16
scripts/coverage.sh Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env sh
set -eu
threshold="${1:-35.0}"
profile="${COVERAGE_PROFILE:-coverage.out}"
go test ./... -coverprofile="$profile" -covermode=atomic
total="$(go tool cover -func="$profile" | awk '/^total:/ { sub(/%/, "", $3); print $3 }')"
awk -v total="$total" -v threshold="$threshold" 'BEGIN {
if (total + 0 < threshold + 0) {
printf "coverage %.1f%% below threshold %.1f%%\n", total, threshold
exit 1
}
printf "coverage %.1f%% >= %.1f%%\n", total, threshold
}'