infra: add fleet control api + multi-instance
- add control API Lambda + fleet deploy workflow - introduce instances registry + common host config - add fleet control skill + scripts - update bootstrap bundles + secrets docs - wire OpenTofu for multi-instance + user-data
This commit is contained in:
parent
c373a14bb4
commit
05d43b1926
59
.github/workflows/fleet-deploy.yml
vendored
Normal file
59
.github/workflows/fleet-deploy.yml
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
name: Fleet Deploy
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
target:
|
||||
description: "all or instance id"
|
||||
required: true
|
||||
ami_override:
|
||||
description: "Optional AMI override"
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
jobs:
|
||||
fleet:
|
||||
runs-on: ubuntu-latest
|
||||
concurrency:
|
||||
group: fleet-deploy
|
||||
cancel-in-progress: false
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||
S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
SSH_PUBLIC_KEY: ${{ secrets.CLAWDINATOR_SSH_PUBLIC_KEY }}
|
||||
TF_BACKEND_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
TF_BACKEND_KEY: state/clawdinators.tfstate
|
||||
TF_BACKEND_REGION: ${{ secrets.AWS_REGION }}
|
||||
TF_BACKEND_DYNAMO_TABLE: clawdinator-terraform-locks
|
||||
TF_VAR_control_api_enabled: true
|
||||
TF_VAR_control_api_token: ${{ secrets.CONTROL_API_TOKEN }}
|
||||
TF_VAR_github_token: ${{ secrets.GITHUB_WORKFLOW_TOKEN }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup OpenTofu
|
||||
uses: opentofu/setup-opentofu@v1
|
||||
|
||||
- name: Install tooling
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y jq
|
||||
|
||||
- name: Resolve AMI
|
||||
run: |
|
||||
if [ -n "${{ inputs.ami_override }}" ]; then
|
||||
echo "AMI_ID=${{ inputs.ami_override }}" >> "$GITHUB_ENV"
|
||||
else
|
||||
ami_id="$(AWS_REGION=${AWS_REGION} bash scripts/resolve-latest-ami.sh)"
|
||||
echo "AMI_ID=${ami_id}" >> "$GITHUB_ENV"
|
||||
fi
|
||||
|
||||
- name: Deploy fleet
|
||||
env:
|
||||
TARGET: ${{ inputs.target }}
|
||||
AMI_ID: ${{ env.AMI_ID }}
|
||||
run: |
|
||||
bash scripts/fleet-deploy.sh
|
||||
14
.github/workflows/image-build.yml
vendored
14
.github/workflows/image-build.yml
vendored
@ -64,14 +64,7 @@ jobs:
|
||||
run: |
|
||||
mkdir -p nix/age-secrets
|
||||
aws s3 sync "s3://${S3_BUCKET}/age-secrets" nix/age-secrets
|
||||
for file in \
|
||||
nix/age-secrets/clawdinator-github-app.pem.age \
|
||||
nix/age-secrets/clawdinator-discord-token.age \
|
||||
nix/age-secrets/clawdinator-anthropic-api-key.age \
|
||||
nix/age-secrets/clawdinator-openai-api-key-peter-2.age
|
||||
do
|
||||
test -f "$file"
|
||||
done
|
||||
bash scripts/validate-age-secrets.sh
|
||||
|
||||
- name: Mint GitHub App token
|
||||
env:
|
||||
@ -91,15 +84,14 @@ jobs:
|
||||
run: |
|
||||
scripts/prepare-repo-seeds.sh repo-seeds
|
||||
|
||||
- name: Upload bootstrap bundle
|
||||
- name: Upload bootstrap bundles
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||
S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
BOOTSTRAP_PREFIX: bootstrap/clawdinator-1
|
||||
run: |
|
||||
bash scripts/upload-bootstrap.sh
|
||||
bash scripts/upload-bootstrap-all.sh
|
||||
|
||||
- name: Build image
|
||||
run: scripts/build-image.sh
|
||||
|
||||
@ -70,7 +70,7 @@ Deploy flow (automation-first):
|
||||
- Bootstrap AWS instances from the AMI with `infra/opentofu/aws` (set `TF_VAR_ami_id`).
|
||||
- Import the image into AWS as an AMI (snapshot import + register image).
|
||||
- Ensure secrets are encrypted to the baked agenix key (see `../nix/nix-secrets/secrets.nix`).
|
||||
- Ensure required secrets exist: `clawdinator-github-app.pem`, `clawdinator-discord-token`, `clawdinator-anthropic-api-key`.
|
||||
- Ensure required secrets exist: `clawdinator-github-app.pem`, `clawdinator-discord-token-<n>`, `clawdinator-control-token`, `clawdinator-anthropic-api-key`.
|
||||
- Update `nix/hosts/<host>.nix` (Discord allowlist, GitHub App installationId, identity name).
|
||||
- Discord must use `messages.queue.byChannel.discord = "interrupt"`; `queue` delays replies to heartbeat and makes the bot appear dead.
|
||||
- Ensure `/var/lib/clawd/repos/clawdinators` contains this repo (self-update requires it).
|
||||
@ -109,7 +109,7 @@ End-to-end SDLC (local → AMI → host) **(verified)**:
|
||||
4) Redeploy from the new AMI (instance replacement):
|
||||
- `devenv shell -- bash -lc "cd infra/opentofu/aws && TF_VAR_ami_id=<AMI_ID> TF_VAR_ssh_public_key=\"$(cat ~/.ssh/id_ed25519.pub)\" TF_VAR_aws_region=eu-central-1 tofu apply -auto-approve"`
|
||||
5) New IP:
|
||||
- `jq -r '.outputs.instance_public_ip.value' infra/opentofu/aws/terraform.tfstate`
|
||||
- `tofu output -json instance_public_ips | jq -r '."clawdinator-1"'`
|
||||
- `ssh -o StrictHostKeyChecking=accept-new root@<ip>`
|
||||
6) Post-deploy sanity:
|
||||
- `systemctl is-active clawdinator`
|
||||
|
||||
28
clawdinator/workspace/skills/fleet/SKILL.md
Normal file
28
clawdinator/workspace/skills/fleet/SKILL.md
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
name: fleet
|
||||
description: Control CLAWDINATOR fleet lifecycle via the control API. Use for /fleet deploy or /fleet status.
|
||||
user-invocable: true
|
||||
---
|
||||
|
||||
# Fleet Control
|
||||
|
||||
Use this skill to manage CLAWDINATOR instances (deploy/replace) and fetch fleet status.
|
||||
|
||||
## Safety + Scope
|
||||
- **Always require an explicit target** for deploy: `all` or `clawdinator-<n>`.
|
||||
- **Never self-deploy**: if target == your instance, refuse.
|
||||
- **No AWS creds**: all actions go through the control API.
|
||||
|
||||
## Commands
|
||||
- `/fleet status`
|
||||
- `/fleet deploy <all|clawdinator-2>`
|
||||
- Optional rollback: `/fleet deploy <target> <ami_id>`
|
||||
|
||||
## Execution
|
||||
Call the control script and return its output:
|
||||
|
||||
```
|
||||
/var/lib/clawd/repos/clawdinators/scripts/fleet-control.sh <action> [target]
|
||||
```
|
||||
|
||||
If the user asks for deploy without a target, ask for the target.
|
||||
148
control/api/handler.js
Normal file
148
control/api/handler.js
Normal file
@ -0,0 +1,148 @@
|
||||
'use strict';
|
||||
|
||||
const AWS = require('aws-sdk');
|
||||
|
||||
const {
|
||||
CONTROL_API_TOKEN,
|
||||
GITHUB_TOKEN,
|
||||
GITHUB_REPO,
|
||||
GITHUB_WORKFLOW,
|
||||
GITHUB_REF,
|
||||
} = process.env;
|
||||
|
||||
function json(statusCode, payload) {
|
||||
return {
|
||||
statusCode,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload),
|
||||
};
|
||||
}
|
||||
|
||||
function unauthorized() {
|
||||
return json(401, { ok: false, error: 'unauthorized' });
|
||||
}
|
||||
|
||||
function badRequest(message) {
|
||||
return json(400, { ok: false, error: message });
|
||||
}
|
||||
|
||||
function getAuthToken(headers) {
|
||||
const auth = headers.authorization || headers.Authorization;
|
||||
if (!auth) return null;
|
||||
const match = auth.match(/Bearer\s+(.+)/i);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
async function dispatchWorkflow(inputs) {
|
||||
const repo = GITHUB_REPO || 'openclaw/clawdinators';
|
||||
const workflow = GITHUB_WORKFLOW || 'fleet-deploy.yml';
|
||||
const ref = GITHUB_REF || 'main';
|
||||
|
||||
const res = await fetch(`https://api.github.com/repos/${repo}/actions/workflows/${workflow}/dispatches`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Accept: 'application/vnd.github+json',
|
||||
Authorization: `Bearer ${GITHUB_TOKEN}`,
|
||||
'User-Agent': 'clawdinator-control',
|
||||
},
|
||||
body: JSON.stringify({ ref, inputs }),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text();
|
||||
throw new Error(`workflow dispatch failed: ${res.status} ${body}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function listInstances() {
|
||||
const ec2 = new AWS.EC2();
|
||||
const resp = await ec2
|
||||
.describeInstances({
|
||||
Filters: [{ Name: 'tag:app', Values: ['clawdinator'] }],
|
||||
})
|
||||
.promise();
|
||||
|
||||
const instances = [];
|
||||
for (const reservation of resp.Reservations || []) {
|
||||
for (const instance of reservation.Instances || []) {
|
||||
const tags = instance.Tags || [];
|
||||
const nameTag = tags.find((tag) => tag.Key === 'Name');
|
||||
instances.push({
|
||||
name: nameTag ? nameTag.Value : 'unknown',
|
||||
id: instance.InstanceId,
|
||||
state: instance.State ? instance.State.Name : 'unknown',
|
||||
ami: instance.ImageId,
|
||||
ip: instance.PublicIpAddress || 'n/a',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return instances;
|
||||
}
|
||||
|
||||
exports.handler = async (event) => {
|
||||
if (!CONTROL_API_TOKEN) {
|
||||
return json(500, { ok: false, error: 'missing CONTROL_API_TOKEN' });
|
||||
}
|
||||
|
||||
const headers = event.headers || {};
|
||||
const token = getAuthToken(headers);
|
||||
if (!token || token !== CONTROL_API_TOKEN) {
|
||||
return unauthorized();
|
||||
}
|
||||
|
||||
if (!event.body) {
|
||||
return badRequest('missing body');
|
||||
}
|
||||
|
||||
const body = event.isBase64Encoded
|
||||
? Buffer.from(event.body, 'base64').toString('utf-8')
|
||||
: event.body;
|
||||
|
||||
let payload;
|
||||
try {
|
||||
payload = JSON.parse(body);
|
||||
} catch (err) {
|
||||
return badRequest('invalid json');
|
||||
}
|
||||
|
||||
const action = (payload.action || '').toLowerCase();
|
||||
const target = payload.target;
|
||||
const caller = payload.caller;
|
||||
const amiOverride = payload.ami_override || '';
|
||||
|
||||
if (action === 'status') {
|
||||
try {
|
||||
const instances = await listInstances();
|
||||
return json(200, { ok: true, instances });
|
||||
} catch (err) {
|
||||
return json(500, { ok: false, error: err.message });
|
||||
}
|
||||
}
|
||||
|
||||
if (action !== 'deploy') {
|
||||
return badRequest('unsupported action');
|
||||
}
|
||||
|
||||
if (!target) {
|
||||
return badRequest('target required');
|
||||
}
|
||||
|
||||
if (caller && target === caller) {
|
||||
return badRequest('refusing self-deploy');
|
||||
}
|
||||
|
||||
if (!GITHUB_TOKEN) {
|
||||
return json(500, { ok: false, error: 'missing GITHUB_TOKEN' });
|
||||
}
|
||||
|
||||
try {
|
||||
await dispatchWorkflow({
|
||||
target,
|
||||
ami_override: amiOverride,
|
||||
});
|
||||
return json(200, { ok: true, message: `deploy queued for ${target}` });
|
||||
} catch (err) {
|
||||
return json(500, { ok: false, error: err.message });
|
||||
}
|
||||
};
|
||||
177
docs/CONTROL_PLANE.md
Normal file
177
docs/CONTROL_PLANE.md
Normal file
@ -0,0 +1,177 @@
|
||||
# Control Plane
|
||||
|
||||
Goal: manage CLAWDINATOR host lifecycle (create/recreate/replace) from **CLAWDINATOR chat** (Telegram/Discord) using an out‑of‑band control API. CLAWDINATOR agents can edit IaC, but **deploys run OOB** with no AWS creds inside agents.
|
||||
|
||||
## Goals
|
||||
- **Plane‑safe control** from CLAWDINATOR chat (chat‑only).
|
||||
- OOB execution (no CLAWDINATOR agent has infra creds).
|
||||
- Repo is the source of truth for fleet state.
|
||||
- Static fleet (Discord token pool constraint).
|
||||
- Simple, auditable deploy flow.
|
||||
|
||||
## Non‑Goals
|
||||
- Task routing, agent scheduling, or tool execution.
|
||||
- Elastic scaling (no arbitrary cattle instances).
|
||||
- Runtime config changes (agents handle their own work).
|
||||
|
||||
## Constraints
|
||||
- Each CLAWDINATOR instance requires a unique Discord bot token.
|
||||
- Fleet size == token pool size (static list).
|
||||
- Persistent changes must land in repo + AMI.
|
||||
- Infra state must be out‑of‑band and locked.
|
||||
|
||||
## Control Plane Components (KISS)
|
||||
- **Control API (AWS Lambda Function URL)**
|
||||
- Authenticated by a shared bearer token.
|
||||
- Dispatches GitHub Actions workflows.
|
||||
- Handles `/fleet status` via AWS DescribeInstances.
|
||||
- **Fleet Control Skill** (runs inside CLAWDINATOR)
|
||||
- Calls the Control API via `scripts/fleet-control.sh`.
|
||||
- Enforces policy (no self‑deploy) before calling.
|
||||
- **GitHub Actions** (execution)
|
||||
- Runs OpenTofu apply.
|
||||
- **OpenTofu** (infra state)
|
||||
- Remote state in S3 + Dynamo lock table.
|
||||
- **Instance Registry** (desired state)
|
||||
- `nix/instances.json` (authoritative map).
|
||||
- **Bootstrap + Secrets**
|
||||
- S3 bootstrap prefix per instance.
|
||||
- Agenix secrets per instance token.
|
||||
|
||||
## Control API Auth
|
||||
- Shared bearer token stored as `clawdinator-control-token.age`.
|
||||
- Token is injected into instances via bootstrap and read from `/run/agenix/clawdinator-control-token`.
|
||||
|
||||
## Control API Env (Lambda)
|
||||
- `CONTROL_API_TOKEN`
|
||||
- `GITHUB_TOKEN`
|
||||
- `GITHUB_REPO` (default `openclaw/clawdinators`)
|
||||
- `GITHUB_WORKFLOW` (default `fleet-deploy.yml`)
|
||||
- `GITHUB_REF` (default `main`)
|
||||
|
||||
## Desired State (Fleet Registry)
|
||||
`nix/instances.json` is the fleet map (single source of truth for infra + host configs).
|
||||
|
||||
Example:
|
||||
```json
|
||||
{
|
||||
"clawdinator-1": {
|
||||
"host": "clawdinator-1",
|
||||
"instanceType": "t3.large",
|
||||
"bootstrapPrefix": "bootstrap/clawdinator-1",
|
||||
"discordTokenSecret": "clawdinator-discord-token-1"
|
||||
},
|
||||
"clawdinator-2": {
|
||||
"host": "clawdinator-2",
|
||||
"instanceType": "t3.large",
|
||||
"bootstrapPrefix": "bootstrap/clawdinator-2",
|
||||
"discordTokenSecret": "clawdinator-discord-token-2"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Command Semantics (Minimal)
|
||||
### `/fleet deploy <target>`
|
||||
- **Target required** (no implicit default): `all` or `<id>`.
|
||||
- Always runs `tofu apply`.
|
||||
- `all`: replace all instances using **latest successful AMI**.
|
||||
- `<id>`: replace only that instance using latest successful AMI.
|
||||
- Also creates new instances if present in desired state.
|
||||
|
||||
### `/fleet status`
|
||||
- Returns live fleet status (EC2 describe by tag).
|
||||
|
||||
## Access Control (Policy)
|
||||
- Shared control token authorizes calls to the Control API.
|
||||
- Policy enforced by the fleet-control skill:
|
||||
- Humans: deploy any target (including `all`).
|
||||
- Bots: deploy **only the other instance** (no self‑deploy).
|
||||
- Control API also rejects `target == caller` when `caller` is provided.
|
||||
|
||||
## Lifecycle Flows
|
||||
### Add a new instance (static token pool)
|
||||
1) Create Discord bot token → `clawdinator-discord-token-2.age`.
|
||||
2) Add entry to `nix/instances.json`.
|
||||
3) Add host file `nix/hosts/clawdinator-2.nix`.
|
||||
4) Run `/fleet deploy all` or `/fleet deploy clawdinator-2`.
|
||||
5) Host boots, pulls its bootstrap prefix, starts CLAWDINATOR.
|
||||
|
||||
### Recreate a single instance
|
||||
- `/fleet deploy clawdinator-2` (forces replace for that host).
|
||||
|
||||
### Roll the fleet
|
||||
- `/fleet deploy all` replaces every host with latest AMI.
|
||||
|
||||
## Self‑Recycle (Out‑of‑Band)
|
||||
- Agents call the Control API (no AWS creds) via the fleet-control skill.
|
||||
- Control API dispatches GitHub Actions; AWS creds live in CI only.
|
||||
|
||||
## State + Audit
|
||||
- **Desired state**: Git repo (`nix/instances.json`).
|
||||
- **Actual state**: OpenTofu S3 backend.
|
||||
- **Audit trail**: Git + Actions logs.
|
||||
|
||||
## AMI Selection (KISS)
|
||||
- Use latest AMI tagged `clawdinator=true`.
|
||||
- Optional override via workflow input `ami_override` for rollback.
|
||||
|
||||
## Deploy Execution (Workflow)
|
||||
- Single workflow `fleet-deploy.yml`.
|
||||
- Inputs: `target`, `ami_override` (optional).
|
||||
- Concurrency group `fleet-deploy` (no overlaps).
|
||||
- `target=all` runs `tofu apply` normally.
|
||||
- `target=<id>` runs `tofu apply -replace aws_instance.clawdinator["<id>"]` (implementation detail).
|
||||
|
||||
## Bootstrap (Per‑Instance)
|
||||
- Upload per instance:
|
||||
- `bootstrap/clawdinator-1`
|
||||
- `bootstrap/clawdinator-2`
|
||||
- Each bundle contains **only that instance’s** Discord token.
|
||||
|
||||
## EC2 User-Data (Instance Boot)
|
||||
- OpenTofu renders a per-instance user‑data script.
|
||||
- Script writes `/etc/clawdinator/bootstrap-prefix`.
|
||||
- Script writes `/etc/clawdinator/control-api-url`.
|
||||
- Script starts `clawdinator-bootstrap.service` + `clawdinator-repo-seed.service`.
|
||||
- Script runs `nixos-rebuild switch --flake /var/lib/clawd/repos/clawdinators#<host>`.
|
||||
|
||||
## Plane Ops Runbook (Chat‑only)
|
||||
### Preflight (before flight)
|
||||
1) Control API Lambda exists; URL is written to `/etc/clawdinator/control-api-url`.
|
||||
2) `clawdinator-control-token.age` exists in `nix-secrets` and is in bootstrap bundles.
|
||||
3) GitHub Action `fleet-deploy.yml` exists and can be dispatched.
|
||||
4) `nix/instances.json` includes all desired instances.
|
||||
5) Discord tokens are encrypted in `nix-secrets` and synced to S3 `age-secrets/`.
|
||||
6) Latest AMI build succeeded (tagged `clawdinator=true`).
|
||||
7) `/fleet status` returns the current fleet.
|
||||
|
||||
### On the plane
|
||||
- `/fleet status` → verify fleet + AMI.
|
||||
- `/fleet deploy clawdinator-2` → bring up new host.
|
||||
- `/fleet deploy all` → roll the fleet to latest AMI.
|
||||
- If rollback needed: rerun deploy with `ami_override` (exact AMI id).
|
||||
|
||||
## Implementation Checklist (From Design → Works)
|
||||
1) Add `nix/instances.json` (clawdinator‑1 + clawdinator‑2).
|
||||
2) Add `nix/hosts/clawdinator-2.nix` and wire host configs to read registry values.
|
||||
3) Update OpenTofu:
|
||||
- multi‑instance `for_each` using `nix/instances.json`.
|
||||
- S3 backend + Dynamo lock table.
|
||||
- Control API Lambda (Function URL).
|
||||
4) Add `clawdinator-control-token.age` to `nix-secrets` and include in bootstrap bundles.
|
||||
5) Add workflow `fleet-deploy.yml`:
|
||||
- inputs: `target`, `ami_override` (optional).
|
||||
- resolves latest AMI by tag when override not set.
|
||||
- runs `tofu apply` (replace when target != all).
|
||||
6) Add fleet-control skill + script (`scripts/fleet-control.sh`).
|
||||
7) Validate:
|
||||
- `/fleet status`
|
||||
- `/fleet deploy clawdinator-2`
|
||||
- verify new host in AWS + CLAWDINATOR service active.
|
||||
|
||||
## Decisions
|
||||
- Control endpoint: AWS Lambda (Function URL).
|
||||
- OpenTofu state: S3 backend + Dynamo lock table.
|
||||
- Control auth: shared bearer token (`clawdinator-control-token.age`).
|
||||
- Plane ops: CLAWDINATOR chat → fleet-control skill → Control API.
|
||||
- Deploy command requires explicit target.
|
||||
@ -24,7 +24,7 @@ Image pipeline:
|
||||
- Runtime: explicit token files via agenix (standard).
|
||||
- GitHub token is required. Prefer GitHub App (`services.clawdinator.githubApp.*`) to mint short-lived tokens.
|
||||
- Store PEM and tokens in the local secrets repo (see docs/SECRETS.md) and decrypt to `/run/agenix/*`.
|
||||
- Discord token is required: set `services.clawdinator.discordTokenFile` to `/run/agenix/clawdinator-discord-token`.
|
||||
- Discord token is required: set `services.clawdinator.discordTokenFile` to `/run/agenix/clawdinator-discord-token-<n>`.
|
||||
|
||||
Deliverables:
|
||||
- Infra code in infra/opentofu/aws.
|
||||
|
||||
@ -10,12 +10,20 @@ Image pipeline (CI):
|
||||
- `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` / `AWS_REGION` / `S3_BUCKET` (required).
|
||||
- `CLAWDINATOR_AGE_KEY` (required; used to build the bootstrap bundle uploaded to S3).
|
||||
|
||||
Control plane (OOB):
|
||||
- `control_api_token` (Lambda env or OpenTofu variable; stored as `clawdinator-control-token.age`).
|
||||
- `github_token` (workflow dispatch PAT).
|
||||
|
||||
Runtime control (CLAWDINATOR):
|
||||
- `clawdinator-control-token.age` is injected to `/run/agenix/clawdinator-control-token` and used by `/fleet`.
|
||||
- Token is shared across instances (KISS); policy enforcement happens in the skill.
|
||||
|
||||
Local storage:
|
||||
- Keep AWS keys encrypted in `../nix/nix-secrets` for local runs if needed.
|
||||
- CI pulls credentials from GitHub Actions secrets (never from host files).
|
||||
|
||||
Runtime (CLAWDINATOR):
|
||||
- Discord bot token (required, per instance).
|
||||
- Discord bot token (required, per instance; `clawdinator-discord-token-<n>.age`).
|
||||
- Telegram bot token (required if Telegram channel is enabled).
|
||||
- GitHub token (required): GitHub App installation token (preferred) or a read-only PAT.
|
||||
- Anthropic API key (required for Claude models).
|
||||
@ -44,10 +52,10 @@ Agenix (local secrets repo):
|
||||
- Sync encrypted secrets to the host at `/var/lib/clawd/nix-secrets`.
|
||||
- Decrypt on host with agenix; point NixOS options at `/run/agenix/*`.
|
||||
- Image builds do **not** bake the agenix identity; the age key is injected at runtime via the bootstrap bundle.
|
||||
- Required files (minimum): `clawdinator-github-app.pem.age`, `clawdinator-discord-token.age`, `clawdinator-anthropic-api-key.age`.
|
||||
- Required files (minimum): `clawdinator-github-app.pem.age`, `clawdinator-anthropic-api-key.age`, `clawdinator-openai-api-key-peter-2.age`, `clawdinator-control-token.age`.
|
||||
- Required per instance: `clawdinator-discord-token-1.age`, `clawdinator-discord-token-2.age` (one per instance).
|
||||
- Required for Telegram: `clawdinator-telegram-bot-token.age` (when Telegram is enabled).
|
||||
- Telegram allowlist (if using allowFrom secrets): `clawdinator-telegram-allow-from.age`.
|
||||
- Also required for OpenAI: `clawdinator-openai-api-key-peter-2.age`.
|
||||
- CI image pipeline (stored locally, not on hosts): `clawdinator-image-uploader-access-key-id.age`, `clawdinator-image-uploader-secret-access-key.age`, `clawdinator-image-bucket-name.age`, `clawdinator-image-bucket-region.age`.
|
||||
|
||||
Bootstrap bundle (runtime injection):
|
||||
@ -69,8 +77,10 @@ Example NixOS wiring (agenix):
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-anthropic-api-key.age";
|
||||
age.secrets."clawdinator-openai-api-key-peter-2".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-openai-api-key-peter-2.age";
|
||||
age.secrets."clawdinator-discord-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-discord-token.age";
|
||||
age.secrets."clawdinator-discord-token-1".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-discord-token-1.age";
|
||||
age.secrets."clawdinator-control-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-control-token.age";
|
||||
age.secrets."clawdinator-telegram-bot-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-telegram-bot-token.age";
|
||||
age.secrets."clawdinator-telegram-allow-from".file =
|
||||
@ -83,7 +93,7 @@ Example NixOS wiring (agenix):
|
||||
services.clawdinator.openaiApiKeyFile =
|
||||
"/run/agenix/clawdinator-openai-api-key-peter-2";
|
||||
services.clawdinator.discordTokenFile =
|
||||
"/run/agenix/clawdinator-discord-token";
|
||||
"/run/agenix/clawdinator-discord-token-1";
|
||||
services.clawdinator.telegramAllowFromFile =
|
||||
"/run/agenix/clawdinator-telegram-allow-from";
|
||||
|
||||
|
||||
@ -50,6 +50,15 @@
|
||||
];
|
||||
};
|
||||
|
||||
nixosConfigurations.clawdinator-2 = nixpkgs.lib.nixosSystem {
|
||||
system = "x86_64-linux";
|
||||
modules = [
|
||||
({ ... }: { nixpkgs.overlays = [ self.overlays.default ]; })
|
||||
agenix.nixosModules.default
|
||||
./nix/hosts/clawdinator-2.nix
|
||||
];
|
||||
};
|
||||
|
||||
nixosConfigurations.clawdinator-1-image = nixpkgs.lib.nixosSystem {
|
||||
system = "x86_64-linux";
|
||||
modules = [
|
||||
|
||||
@ -1,39 +1,69 @@
|
||||
# OpenTofu (AWS S3 Image Bucket)
|
||||
# OpenTofu (AWS Infra)
|
||||
|
||||
Goal: use the CLAWDINATOR S3 bucket for images + bootstrap artifacts, create the VM Import role, and attach import permissions to the CI IAM user.
|
||||
Also provisions EFS for shared memory.
|
||||
Goal: manage the CLAWDINATOR fleet infrastructure (S3 image bucket, VM import role, EFS, EC2 instances, and control-plane Lambda).
|
||||
|
||||
Prereqs:
|
||||
## Prereqs
|
||||
- AWS credentials with permissions to manage IAM (use your homelab-admin key locally).
|
||||
- Fleet registry: `nix/instances.json` (authoritative instance list).
|
||||
|
||||
Usage:
|
||||
- export AWS_ACCESS_KEY_ID=...
|
||||
- export AWS_SECRET_ACCESS_KEY=...
|
||||
- export AWS_REGION=eu-central-1
|
||||
- export TF_VAR_aws_region=eu-central-1
|
||||
- export TF_VAR_ami_id=ami-... # leave empty to skip instance creation
|
||||
- export TF_VAR_ssh_public_key="$(cat ~/.ssh/id_ed25519.pub)" # required when ami_id is set
|
||||
- tofu init
|
||||
- tofu apply
|
||||
## Usage
|
||||
|
||||
Outputs:
|
||||
```sh
|
||||
export AWS_ACCESS_KEY_ID=...
|
||||
export AWS_SECRET_ACCESS_KEY=...
|
||||
export AWS_REGION=eu-central-1
|
||||
export TF_VAR_aws_region=eu-central-1
|
||||
export TF_VAR_ami_id=ami-... # leave empty to skip instance creation
|
||||
export TF_VAR_ssh_public_key="$(cat ~/.ssh/id_ed25519.pub)" # required when ami_id is set
|
||||
```
|
||||
|
||||
### Remote state (S3 + Dynamo)
|
||||
|
||||
```sh
|
||||
tofu init \
|
||||
-backend-config="bucket=clawdinator-images-eu1-20260107165216" \
|
||||
-backend-config="key=state/clawdinators.tfstate" \
|
||||
-backend-config="region=eu-central-1" \
|
||||
-backend-config="dynamodb_table=clawdinator-terraform-locks"
|
||||
```
|
||||
|
||||
### Apply
|
||||
|
||||
```sh
|
||||
tofu apply
|
||||
```
|
||||
|
||||
## Control-plane API (optional)
|
||||
Enable only when tokens are available:
|
||||
|
||||
```sh
|
||||
export TF_VAR_control_api_enabled=true
|
||||
export TF_VAR_control_api_token=...
|
||||
export TF_VAR_github_token=...
|
||||
```
|
||||
|
||||
## Outputs
|
||||
- `bucket_name`
|
||||
- `aws_region`
|
||||
- `ci_user_name`
|
||||
- `access_key_id`
|
||||
- `secret_access_key`
|
||||
- `instance_id`
|
||||
- `instance_public_ip`
|
||||
- `instance_ids`
|
||||
- `instance_public_ips`
|
||||
- `instance_public_dns`
|
||||
- `efs_file_system_id`
|
||||
- `efs_security_group_id`
|
||||
- `control_api_url`
|
||||
|
||||
CI wiring:
|
||||
## CI wiring
|
||||
- Set GitHub Actions secrets:
|
||||
- `AWS_ACCESS_KEY_ID`
|
||||
- `AWS_SECRET_ACCESS_KEY`
|
||||
- `AWS_REGION`
|
||||
- `S3_BUCKET`
|
||||
- `CLAWDINATOR_SSH_PUBLIC_KEY`
|
||||
- `CONTROL_API_TOKEN`
|
||||
- `GITHUB_WORKFLOW_TOKEN`
|
||||
|
||||
Runtime bootstrap:
|
||||
## Runtime bootstrap
|
||||
- Instances get an IAM role with read access to `s3://${S3_BUCKET}/bootstrap/*` for secrets + repo seeds.
|
||||
|
||||
@ -1,10 +1,26 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}
|
||||
archive = {
|
||||
source = "hashicorp/archive"
|
||||
version = "~> 2.0"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {}
|
||||
}
|
||||
|
||||
provider "aws" {
|
||||
region = var.aws_region
|
||||
}
|
||||
|
||||
locals {
|
||||
tags = merge(var.tags, { "app" = "clawdinator" })
|
||||
instance_enabled = var.ami_id != ""
|
||||
instances = jsondecode(file("${path.module}/../../nix/instances.json"))
|
||||
instance_enabled = var.ami_id != "" && length(local.instances) > 0
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket" "image_bucket" {
|
||||
@ -41,6 +57,18 @@ resource "aws_s3_bucket_versioning" "image_bucket" {
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_dynamodb_table" "terraform_lock" {
|
||||
name = var.terraform_lock_table_name
|
||||
billing_mode = "PAY_PER_REQUEST"
|
||||
hash_key = "LockID"
|
||||
tags = local.tags
|
||||
|
||||
attribute {
|
||||
name = "LockID"
|
||||
type = "S"
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_iam_policy_document" "vmimport_assume" {
|
||||
statement {
|
||||
actions = ["sts:AssumeRole"]
|
||||
@ -291,14 +319,21 @@ resource "aws_efs_mount_target" "memory" {
|
||||
}
|
||||
|
||||
resource "aws_instance" "clawdinator" {
|
||||
count = local.instance_enabled ? 1 : 0
|
||||
for_each = local.instance_enabled ? local.instances : {}
|
||||
ami = var.ami_id
|
||||
instance_type = var.instance_type
|
||||
instance_type = each.value.instanceType
|
||||
subnet_id = element(data.aws_subnets.default.ids, 0)
|
||||
vpc_security_group_ids = [aws_security_group.clawdinator[0].id]
|
||||
key_name = aws_key_pair.operator[0].key_name
|
||||
associate_public_ip_address = true
|
||||
iam_instance_profile = aws_iam_instance_profile.instance.name
|
||||
user_data_replace_on_change = true
|
||||
user_data = templatefile("${path.module}/user-data.sh.tmpl", {
|
||||
instance_name = each.value.host
|
||||
bootstrap_prefix = each.value.bootstrapPrefix
|
||||
flake_host = each.value.host
|
||||
control_api_url = var.control_api_enabled ? aws_lambda_function_url.control[0].function_url : ""
|
||||
})
|
||||
|
||||
root_block_device {
|
||||
volume_size = var.root_volume_size_gb
|
||||
@ -306,6 +341,90 @@ resource "aws_instance" "clawdinator" {
|
||||
}
|
||||
|
||||
tags = merge(local.tags, {
|
||||
Name = var.instance_name
|
||||
Name = each.value.host
|
||||
})
|
||||
}
|
||||
|
||||
data "archive_file" "control_lambda" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
type = "zip"
|
||||
source_dir = "${path.module}/../../control/api"
|
||||
output_path = "${path.module}/.terraform/control-api.zip"
|
||||
}
|
||||
|
||||
data "aws_iam_policy_document" "control_lambda_assume" {
|
||||
statement {
|
||||
actions = ["sts:AssumeRole"]
|
||||
principals {
|
||||
type = "Service"
|
||||
identifiers = ["lambda.amazonaws.com"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "control_lambda" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
name = var.control_api_name
|
||||
assume_role_policy = data.aws_iam_policy_document.control_lambda_assume.json
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "control_lambda_basic" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
role = aws_iam_role.control_lambda[0].name
|
||||
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy" "control_lambda_ec2" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
name = "clawdinator-control-ec2"
|
||||
role = aws_iam_role.control_lambda[0].id
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = ["ec2:DescribeInstances"]
|
||||
Resource = "*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
resource "aws_lambda_function" "control" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
function_name = var.control_api_name
|
||||
role = aws_iam_role.control_lambda[0].arn
|
||||
runtime = "nodejs20.x"
|
||||
handler = "handler.handler"
|
||||
filename = data.archive_file.control_lambda[0].output_path
|
||||
source_code_hash = data.archive_file.control_lambda[0].output_base64sha256
|
||||
timeout = 10
|
||||
memory_size = 256
|
||||
tags = local.tags
|
||||
|
||||
environment {
|
||||
variables = {
|
||||
CONTROL_API_TOKEN = var.control_api_token
|
||||
GITHUB_TOKEN = var.github_token
|
||||
GITHUB_REPO = var.github_repo
|
||||
GITHUB_WORKFLOW = var.github_workflow
|
||||
GITHUB_REF = var.github_ref
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_lambda_function_url" "control" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
function_name = aws_lambda_function.control[0].function_name
|
||||
authorization_type = "NONE"
|
||||
}
|
||||
|
||||
resource "aws_lambda_permission" "control_url" {
|
||||
count = var.control_api_enabled ? 1 : 0
|
||||
statement_id = "AllowFunctionUrl"
|
||||
action = "lambda:InvokeFunctionUrl"
|
||||
function_name = aws_lambda_function.control[0].function_name
|
||||
principal = "*"
|
||||
function_url_auth_type = "NONE"
|
||||
}
|
||||
|
||||
@ -23,19 +23,19 @@ output "secret_access_key" {
|
||||
description = "Use in CI as AWS_SECRET_ACCESS_KEY."
|
||||
}
|
||||
|
||||
output "instance_id" {
|
||||
value = local.instance_enabled ? aws_instance.clawdinator[0].id : null
|
||||
description = "CLAWDINATOR instance ID."
|
||||
output "instance_ids" {
|
||||
value = { for name, inst in aws_instance.clawdinator : name => inst.id }
|
||||
description = "CLAWDINATOR instance IDs by name."
|
||||
}
|
||||
|
||||
output "instance_public_ip" {
|
||||
value = local.instance_enabled ? aws_instance.clawdinator[0].public_ip : null
|
||||
description = "CLAWDINATOR public IP."
|
||||
output "instance_public_ips" {
|
||||
value = { for name, inst in aws_instance.clawdinator : name => inst.public_ip }
|
||||
description = "CLAWDINATOR public IPs by name."
|
||||
}
|
||||
|
||||
output "instance_public_dns" {
|
||||
value = local.instance_enabled ? aws_instance.clawdinator[0].public_dns : null
|
||||
description = "CLAWDINATOR public DNS."
|
||||
value = { for name, inst in aws_instance.clawdinator : name => inst.public_dns }
|
||||
description = "CLAWDINATOR public DNS by name."
|
||||
}
|
||||
|
||||
output "efs_file_system_id" {
|
||||
@ -47,3 +47,8 @@ output "efs_security_group_id" {
|
||||
value = aws_security_group.efs.id
|
||||
description = "Security group ID for EFS."
|
||||
}
|
||||
|
||||
output "control_api_url" {
|
||||
value = var.control_api_enabled ? aws_lambda_function_url.control[0].function_url : null
|
||||
description = "Control-plane API Lambda URL."
|
||||
}
|
||||
|
||||
27
infra/opentofu/aws/user-data.sh.tmpl
Normal file
27
infra/opentofu/aws/user-data.sh.tmpl
Normal file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
instance_name="${instance_name}"
|
||||
bootstrap_prefix="${bootstrap_prefix}"
|
||||
flake_host="${flake_host}"
|
||||
control_api_url="${control_api_url}"
|
||||
|
||||
install -d -m 0755 /etc/clawdinator
|
||||
printf '%s' "${instance_name}" > /etc/clawdinator/instance-name
|
||||
printf '%s' "${bootstrap_prefix}" > /etc/clawdinator/bootstrap-prefix
|
||||
if [ -n "${control_api_url}" ]; then
|
||||
printf '%s' "${control_api_url}" > /etc/clawdinator/control-api-url
|
||||
fi
|
||||
|
||||
systemctl stop clawdinator.service || true
|
||||
systemctl daemon-reload
|
||||
|
||||
systemctl start clawdinator-bootstrap.service
|
||||
systemctl start clawdinator-repo-seed.service
|
||||
|
||||
if [ ! -d /var/lib/clawd/repos/clawdinators ]; then
|
||||
echo "clawdinator repo missing after bootstrap" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nixos-rebuild switch --flake /var/lib/clawd/repos/clawdinators#${flake_host}
|
||||
@ -27,18 +27,6 @@ variable "ami_id" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "instance_name" {
|
||||
description = "Name tag for the CLAWDINATOR instance."
|
||||
type = string
|
||||
default = "clawdinator-1"
|
||||
}
|
||||
|
||||
variable "instance_type" {
|
||||
description = "EC2 instance type."
|
||||
type = string
|
||||
default = "t3.large"
|
||||
}
|
||||
|
||||
variable "root_volume_size_gb" {
|
||||
description = "Root EBS volume size in GiB."
|
||||
type = number
|
||||
@ -60,3 +48,61 @@ variable "allowed_cidrs" {
|
||||
type = list(string)
|
||||
default = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
variable "terraform_lock_table_name" {
|
||||
description = "DynamoDB table name for OpenTofu state locking."
|
||||
type = string
|
||||
default = "clawdinator-terraform-locks"
|
||||
}
|
||||
|
||||
variable "control_api_enabled" {
|
||||
description = "Enable the control-plane API Lambda."
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "control_api_name" {
|
||||
description = "Name for the control-plane API Lambda."
|
||||
type = string
|
||||
default = "clawdinator-control-api"
|
||||
}
|
||||
|
||||
variable "control_api_token" {
|
||||
description = "Bearer token required by the control-plane API."
|
||||
type = string
|
||||
sensitive = true
|
||||
default = ""
|
||||
validation {
|
||||
condition = !var.control_api_enabled || length(var.control_api_token) > 0
|
||||
error_message = "control_api_token is required when control_api_enabled is true."
|
||||
}
|
||||
}
|
||||
|
||||
variable "github_token" {
|
||||
description = "GitHub token with workflow dispatch permissions."
|
||||
type = string
|
||||
sensitive = true
|
||||
default = ""
|
||||
validation {
|
||||
condition = !var.control_api_enabled || length(var.github_token) > 0
|
||||
error_message = "github_token is required when control_api_enabled is true."
|
||||
}
|
||||
}
|
||||
|
||||
variable "github_repo" {
|
||||
description = "GitHub repo for workflow dispatch (owner/name)."
|
||||
type = string
|
||||
default = "openclaw/clawdinators"
|
||||
}
|
||||
|
||||
variable "github_workflow" {
|
||||
description = "Workflow file name for fleet deploy."
|
||||
type = string
|
||||
default = "fleet-deploy.yml"
|
||||
}
|
||||
|
||||
variable "github_ref" {
|
||||
description = "Git ref to deploy from."
|
||||
type = string
|
||||
default = "main"
|
||||
}
|
||||
|
||||
@ -6,8 +6,10 @@
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-anthropic-api-key.age";
|
||||
age.secrets."clawdinator-openai-api-key-peter-2".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-openai-api-key-peter-2.age";
|
||||
age.secrets."clawdinator-discord-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-discord-token.age";
|
||||
age.secrets."clawdinator-discord-token-1".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-discord-token-1.age";
|
||||
age.secrets."clawdinator-control-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-control-token.age";
|
||||
age.secrets."clawdinator-telegram-bot-token".file =
|
||||
"/var/lib/clawd/nix-secrets/clawdinator-telegram-bot-token.age";
|
||||
age.secrets."clawdinator-telegram-allow-from".file =
|
||||
@ -76,7 +78,7 @@
|
||||
|
||||
anthropicApiKeyFile = "/run/agenix/clawdinator-anthropic-api-key";
|
||||
openaiApiKeyFile = "/run/agenix/clawdinator-openai-api-key-peter-2";
|
||||
discordTokenFile = "/run/agenix/clawdinator-discord-token";
|
||||
discordTokenFile = "/run/agenix/clawdinator-discord-token-1";
|
||||
telegramAllowFromFile = "/run/agenix/clawdinator-telegram-allow-from";
|
||||
|
||||
githubApp = {
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
(modulesPath + "/virtualisation/ec2-data.nix")
|
||||
(modulesPath + "/virtualisation/amazon-init.nix")
|
||||
../modules/clawdinator.nix
|
||||
./clawdinator-1-common.nix
|
||||
./clawdinator-common.nix
|
||||
];
|
||||
|
||||
networking.hostName = "clawdinator-1";
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
imports = [
|
||||
(modulesPath + "/virtualisation/amazon-image.nix")
|
||||
../modules/clawdinator.nix
|
||||
./clawdinator-1-common.nix
|
||||
./clawdinator-common.nix
|
||||
];
|
||||
|
||||
networking.hostName = "clawdinator-1";
|
||||
|
||||
23
nix/hosts/clawdinator-2.nix
Normal file
23
nix/hosts/clawdinator-2.nix
Normal file
@ -0,0 +1,23 @@
|
||||
{ lib, modulesPath, pkgs, ... }:
|
||||
{
|
||||
imports = [
|
||||
(modulesPath + "/virtualisation/amazon-image.nix")
|
||||
../modules/clawdinator.nix
|
||||
./clawdinator-common.nix
|
||||
];
|
||||
|
||||
networking.hostName = "clawdinator-2";
|
||||
time.timeZone = "UTC";
|
||||
system.stateVersion = "26.05";
|
||||
|
||||
nix.package = pkgs.nixVersions.stable;
|
||||
nix.settings.experimental-features = [ "nix-command" "flakes" ];
|
||||
|
||||
boot.loader.grub.device = lib.mkForce "/dev/nvme0n1";
|
||||
|
||||
users.users.root.openssh.authorizedKeys.keys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOLItFT3SVm5r7gELrfRRJxh6V2sf/BIx7HKXt6oVWpB"
|
||||
];
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 22 ];
|
||||
}
|
||||
@ -1,6 +1,15 @@
|
||||
{ lib, config, ... }:
|
||||
let
|
||||
cfg = config.services.clawdinator;
|
||||
secretsPath = config.clawdinator.secretsPath;
|
||||
instancesFile = ../instances.json;
|
||||
instances = builtins.fromJSON (builtins.readFile instancesFile);
|
||||
hostName = config.networking.hostName;
|
||||
instance =
|
||||
if builtins.hasAttr hostName instances
|
||||
then instances.${hostName}
|
||||
else throw "clawdinator: missing instance ${hostName} in ${instancesFile}";
|
||||
discordTokenSecret = instance.discordTokenSecret;
|
||||
repoSeedsFile = ../../clawdinator/repos.tsv;
|
||||
repoSeedLines =
|
||||
lib.filter
|
||||
@ -46,8 +55,13 @@ in
|
||||
owner = "clawdinator";
|
||||
group = "clawdinator";
|
||||
};
|
||||
age.secrets."clawdinator-discord-token" = {
|
||||
file = "${secretsPath}/clawdinator-discord-token.age";
|
||||
age.secrets."${discordTokenSecret}" = {
|
||||
file = "${secretsPath}/${discordTokenSecret}.age";
|
||||
owner = "clawdinator";
|
||||
group = "clawdinator";
|
||||
};
|
||||
age.secrets."clawdinator-control-token" = {
|
||||
file = "${secretsPath}/clawdinator-control-token.age";
|
||||
owner = "clawdinator";
|
||||
group = "clawdinator";
|
||||
};
|
||||
@ -64,13 +78,13 @@ in
|
||||
|
||||
services.clawdinator = {
|
||||
enable = true;
|
||||
instanceName = "CLAWDINATOR-1";
|
||||
instanceName = lib.toUpper hostName;
|
||||
memoryDir = "/memory";
|
||||
repoSeedSnapshotDir = "/var/lib/clawd/repo-seeds";
|
||||
bootstrap = {
|
||||
enable = true;
|
||||
s3Bucket = "clawdinator-images-eu1-20260107165216";
|
||||
s3Prefix = "bootstrap/clawdinator-1";
|
||||
s3Prefix = instance.bootstrapPrefix;
|
||||
region = "eu-central-1";
|
||||
secretsDir = "/var/lib/clawd/nix-secrets";
|
||||
repoSeedsDir = "/var/lib/clawd/repo-seeds";
|
||||
@ -109,7 +123,7 @@ in
|
||||
{
|
||||
id = "main";
|
||||
default = true;
|
||||
identity.name = "CLAWDINATOR-1";
|
||||
identity.name = cfg.instanceName;
|
||||
}
|
||||
];
|
||||
logging = {
|
||||
@ -188,7 +202,7 @@ in
|
||||
|
||||
anthropicApiKeyFile = "/run/agenix/clawdinator-anthropic-api-key";
|
||||
openaiApiKeyFile = "/run/agenix/clawdinator-openai-api-key-peter-2";
|
||||
discordTokenFile = "/run/agenix/clawdinator-discord-token";
|
||||
discordTokenFile = "/run/agenix/${discordTokenSecret}";
|
||||
telegramAllowFromFile = "/run/agenix/clawdinator-telegram-allow-from";
|
||||
|
||||
githubApp = {
|
||||
@ -201,7 +215,7 @@ in
|
||||
|
||||
selfUpdate.enable = true;
|
||||
selfUpdate.flakePath = "/var/lib/clawd/repos/clawdinators";
|
||||
selfUpdate.flakeHost = "clawdinator-1";
|
||||
selfUpdate.flakeHost = hostName;
|
||||
|
||||
githubSync.enable = true;
|
||||
githubSync.org = "openclaw";
|
||||
14
nix/instances.json
Normal file
14
nix/instances.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"clawdinator-1": {
|
||||
"host": "clawdinator-1",
|
||||
"instanceType": "t3.large",
|
||||
"bootstrapPrefix": "bootstrap/clawdinator-1",
|
||||
"discordTokenSecret": "clawdinator-discord-token-1"
|
||||
},
|
||||
"clawdinator-2": {
|
||||
"host": "clawdinator-2",
|
||||
"instanceType": "t3.large",
|
||||
"bootstrapPrefix": "bootstrap/clawdinator-2",
|
||||
"discordTokenSecret": "clawdinator-discord-token-2"
|
||||
}
|
||||
}
|
||||
@ -696,8 +696,8 @@ in
|
||||
systemd.services.clawdinator-bootstrap = lib.mkIf cfg.bootstrap.enable {
|
||||
description = "CLAWDINATOR bootstrap (S3 secrets + repo seeds)";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
after = [ "network-online.target" "amazon-init.service" ];
|
||||
wants = [ "network-online.target" "amazon-init.service" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
|
||||
@ -4,6 +4,14 @@ set -euo pipefail
|
||||
bucket="${1:?S3 bucket required}"
|
||||
prefix="${2:?S3 prefix required}"
|
||||
secrets_dir="${3:?Secrets dir required}"
|
||||
|
||||
override_file="${BOOTSTRAP_PREFIX_FILE:-/etc/clawdinator/bootstrap-prefix}"
|
||||
if [ -f "${override_file}" ]; then
|
||||
override_prefix="$(cat "${override_file}")"
|
||||
if [ -n "${override_prefix}" ]; then
|
||||
prefix="${override_prefix}"
|
||||
fi
|
||||
fi
|
||||
repo_seeds_dir="${4:?Repo seeds dir required}"
|
||||
age_key_path="${5:?Age key path required}"
|
||||
secrets_archive="${6:-secrets.tar.zst}"
|
||||
|
||||
70
scripts/fleet-control.sh
Executable file
70
scripts/fleet-control.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
action="${1:-}"
|
||||
target="${2:-}"
|
||||
ami_override="${3:-}"
|
||||
|
||||
if [ -z "${action}" ]; then
|
||||
echo "Usage: fleet-control.sh <deploy|status> [target] [ami_override]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
api_url_file="/etc/clawdinator/control-api-url"
|
||||
token_file="/run/agenix/clawdinator-control-token"
|
||||
caller_file="/etc/clawdinator/instance-name"
|
||||
|
||||
if [ ! -f "${api_url_file}" ]; then
|
||||
echo "Missing control API URL: ${api_url_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${token_file}" ]; then
|
||||
echo "Missing control API token: ${token_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${caller_file}" ]; then
|
||||
echo "Missing instance name: ${caller_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
api_url="$(cat "${api_url_file}")"
|
||||
control_token="$(cat "${token_file}")"
|
||||
caller="$(cat "${caller_file}")"
|
||||
|
||||
if [ "${action}" = "deploy" ]; then
|
||||
if [ -z "${target}" ]; then
|
||||
echo "Target required. Usage: fleet-control.sh deploy <all|clawdinator-2>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${target}" = "${caller}" ]; then
|
||||
echo "Refusing self-deploy for ${caller}." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
payload="$(jq -n \
|
||||
--arg action "${action}" \
|
||||
--arg target "${target}" \
|
||||
--arg caller "${caller}" \
|
||||
--arg ami_override "${ami_override}" \
|
||||
'{action: $action, target: $target, caller: $caller, ami_override: $ami_override}')"
|
||||
|
||||
response="$(curl -sS -X POST \
|
||||
-H "Authorization: Bearer ${control_token}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "${payload}" \
|
||||
"${api_url}")"
|
||||
|
||||
if [ "${action}" = "status" ]; then
|
||||
ok="$(printf '%s' "${response}" | jq -r '.ok')"
|
||||
if [ "${ok}" != "true" ]; then
|
||||
echo "${response}" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Name | InstanceId | State | AMI | Public IP"
|
||||
printf '%s' "${response}" | jq -r '.instances[] | "\(.name) | \(.id) | \(.state) | \(.ami) | \(.ip)"'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "${response}"
|
||||
30
scripts/fleet-deploy.sh
Executable file
30
scripts/fleet-deploy.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
target="${TARGET:?TARGET required}"
|
||||
ami_id="${AMI_ID:?AMI_ID required}"
|
||||
aws_region="${AWS_REGION:?AWS_REGION required}"
|
||||
ssh_public_key="${SSH_PUBLIC_KEY:?SSH_PUBLIC_KEY required}"
|
||||
|
||||
backend_bucket="${TF_BACKEND_BUCKET:?TF_BACKEND_BUCKET required}"
|
||||
backend_key="${TF_BACKEND_KEY:?TF_BACKEND_KEY required}"
|
||||
backend_region="${TF_BACKEND_REGION:-${aws_region}}"
|
||||
backend_table="${TF_BACKEND_DYNAMO_TABLE:?TF_BACKEND_DYNAMO_TABLE required}"
|
||||
|
||||
cd infra/opentofu/aws
|
||||
|
||||
tofu init \
|
||||
-backend-config="bucket=${backend_bucket}" \
|
||||
-backend-config="key=${backend_key}" \
|
||||
-backend-config="region=${backend_region}" \
|
||||
-backend-config="dynamodb_table=${backend_table}"
|
||||
|
||||
export TF_VAR_aws_region="${aws_region}"
|
||||
export TF_VAR_ami_id="${ami_id}"
|
||||
export TF_VAR_ssh_public_key="${ssh_public_key}"
|
||||
|
||||
if [ "${target}" = "all" ]; then
|
||||
tofu apply -auto-approve
|
||||
else
|
||||
tofu apply -auto-approve -replace "aws_instance.clawdinator[\"${target}\"]"
|
||||
fi
|
||||
26
scripts/fleet-status.sh
Executable file
26
scripts/fleet-status.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
region="${AWS_REGION:?AWS_REGION required}"
|
||||
|
||||
instances_json="$(aws ec2 describe-instances \
|
||||
--region "${region}" \
|
||||
--filters "Name=tag:app,Values=clawdinator" \
|
||||
--query 'Reservations[].Instances[]' \
|
||||
--output json)"
|
||||
|
||||
if [ "${instances_json}" = "[]" ]; then
|
||||
echo "No CLAWDINATOR instances found."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "CLAWDINATOR Fleet"
|
||||
echo "Name | InstanceId | State | AMI | Public IP"
|
||||
|
||||
echo "${instances_json}" | jq -r '.[] | {
|
||||
name: (.Tags[]? | select(.Key=="Name").Value) // "unknown",
|
||||
id: .InstanceId,
|
||||
state: .State.Name,
|
||||
ami: .ImageId,
|
||||
ip: (.PublicIpAddress // "n/a")
|
||||
} | "\(.name) | \(.id) | \(.state) | \(.ami) | \(.ip)"'
|
||||
18
scripts/resolve-latest-ami.sh
Executable file
18
scripts/resolve-latest-ami.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
region="${AWS_REGION:?AWS_REGION required}"
|
||||
|
||||
ami_id="$(aws ec2 describe-images \
|
||||
--region "${region}" \
|
||||
--owners self \
|
||||
--filters "Name=tag:clawdinator,Values=true" \
|
||||
--query 'Images | sort_by(@,&CreationDate)[-1].ImageId' \
|
||||
--output text)"
|
||||
|
||||
if [ -z "${ami_id}" ] || [ "${ami_id}" = "None" ]; then
|
||||
echo "No AMI found with tag clawdinator=true" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "${ami_id}"
|
||||
50
scripts/upload-bootstrap-all.sh
Executable file
50
scripts/upload-bootstrap-all.sh
Executable file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
instances_file="${INSTANCES_FILE:-nix/instances.json}"
|
||||
secrets_dir="${SECRETS_DIR:-nix/age-secrets}"
|
||||
age_key_file="${AGE_KEY_FILE:-nix/keys/clawdinator.agekey}"
|
||||
repo_seeds_dir="${REPO_SEEDS_DIR:-repo-seeds}"
|
||||
|
||||
if [ ! -f "${instances_file}" ]; then
|
||||
echo "Missing instances file: ${instances_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
workdir="$(mktemp -d)"
|
||||
cleanup() {
|
||||
rm -rf "${workdir}"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
while IFS= read -r instance_name; do
|
||||
bootstrap_prefix="$(jq -r --arg name "${instance_name}" '.[$name].bootstrapPrefix' "${instances_file}")"
|
||||
token_secret="$(jq -r --arg name "${instance_name}" '.[$name].discordTokenSecret' "${instances_file}")"
|
||||
|
||||
if [ -z "${bootstrap_prefix}" ] || [ "${bootstrap_prefix}" = "null" ]; then
|
||||
echo "Missing bootstrapPrefix for ${instance_name}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "${token_secret}" ] || [ "${token_secret}" = "null" ]; then
|
||||
echo "Missing discordTokenSecret for ${instance_name}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
instance_secrets="${workdir}/${instance_name}/secrets"
|
||||
mkdir -p "${instance_secrets}"
|
||||
|
||||
rsync -a --exclude 'clawdinator-discord-token-*.age' "${secrets_dir}/" "${instance_secrets}/"
|
||||
|
||||
if [ ! -f "${secrets_dir}/${token_secret}.age" ]; then
|
||||
echo "Missing instance token ${secrets_dir}/${token_secret}.age" >&2
|
||||
exit 1
|
||||
fi
|
||||
cp "${secrets_dir}/${token_secret}.age" "${instance_secrets}/${token_secret}.age"
|
||||
|
||||
BOOTSTRAP_PREFIX="${bootstrap_prefix}" \
|
||||
SECRETS_DIR="${instance_secrets}" \
|
||||
AGE_KEY_FILE="${age_key_file}" \
|
||||
REPO_SEEDS_DIR="${repo_seeds_dir}" \
|
||||
bash scripts/upload-bootstrap.sh
|
||||
|
||||
done < <(jq -r 'keys[]' "${instances_file}")
|
||||
37
scripts/validate-age-secrets.sh
Executable file
37
scripts/validate-age-secrets.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
instances_file="${INSTANCES_FILE:-nix/instances.json}"
|
||||
secrets_dir="${SECRETS_DIR:-nix/age-secrets}"
|
||||
|
||||
required_common=(
|
||||
"clawdinator-github-app.pem.age"
|
||||
"clawdinator-anthropic-api-key.age"
|
||||
"clawdinator-openai-api-key-peter-2.age"
|
||||
"clawdinator-control-token.age"
|
||||
"clawdinator-telegram-bot-token.age"
|
||||
"clawdinator-telegram-allow-from.age"
|
||||
)
|
||||
|
||||
for secret_file in "${required_common[@]}"; do
|
||||
if [ ! -f "${secrets_dir}/${secret_file}" ]; then
|
||||
echo "Missing required secret: ${secrets_dir}/${secret_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ ! -f "${instances_file}" ]; then
|
||||
echo "Missing instances file: ${instances_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while IFS= read -r token_secret; do
|
||||
if [ -z "${token_secret}" ] || [ "${token_secret}" = "null" ]; then
|
||||
echo "Missing discordTokenSecret in ${instances_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${secrets_dir}/${token_secret}.age" ]; then
|
||||
echo "Missing instance discord token: ${secrets_dir}/${token_secret}.age" >&2
|
||||
exit 1
|
||||
fi
|
||||
done < <(jq -r 'to_entries[].value.discordTokenSecret' "${instances_file}")
|
||||
Loading…
Reference in New Issue
Block a user