diff --git a/memory/ops.md b/memory/ops.md index e534376..f0d0e8a 100644 --- a/memory/ops.md +++ b/memory/ops.md @@ -26,3 +26,8 @@ Update with incidents, fixes, and operational lessons. - Instances: - clawdinator-1: i-0b6060699bb413d82 (IP 18.198.25.107, DNS ec2-18-198-25-107.eu-central-1.compute.amazonaws.com). - clawdinator-2: i-07bcba2bb924dfc93 (IP 3.66.165.141, DNS ec2-3-66-165-141.eu-central-1.compute.amazonaws.com). + +## 2026-02-04 +- clawdinator-2 booted without /etc/ec2-metadata/user-data, so amazon-init skipped user-data and clawdinator stayed inactive. +- Manual recovery: fetch IMDS user-data, rerun user-data script, set git safe.directory, set transient hostname. +- Fix: add fetch-ec2-metadata systemd unit to AMI config + git safe.directory in programs.git. diff --git a/nix/hosts/clawdinator-1-image.nix b/nix/hosts/clawdinator-1-image.nix index 43de157..64e587a 100644 --- a/nix/hosts/clawdinator-1-image.nix +++ b/nix/hosts/clawdinator-1-image.nix @@ -1,4 +1,4 @@ -{ modulesPath, config, ... }: { +{ modulesPath, config, pkgs, ... }: { imports = [ (modulesPath + "/virtualisation/ec2-data.nix") (modulesPath + "/virtualisation/amazon-init.nix") @@ -29,4 +29,20 @@ device = "/dev/disk/by-label/nixos"; fsType = "ext4"; }; + + systemd.services.fetch-ec2-metadata = { + description = "Fetch EC2 metadata"; + wantedBy = [ "multi-user.target" ]; + wants = [ "network-online.target" ]; + after = [ "network-online.target" ]; + path = [ pkgs.curl ]; + serviceConfig = { + Type = "oneshot"; + StandardOutput = "journal+console"; + ExecStart = "${pkgs.bash}/bin/bash ${../../scripts/fetch-ec2-metadata.sh}"; + }; + }; + + systemd.services.amazon-init.after = [ "fetch-ec2-metadata.service" ]; + systemd.services.amazon-init.wants = [ "fetch-ec2-metadata.service" ]; } diff --git a/nix/modules/clawdinator.nix b/nix/modules/clawdinator.nix index a284b9e..402e3bd 100644 --- a/nix/modules/clawdinator.nix +++ b/nix/modules/clawdinator.nix @@ -486,6 +486,9 @@ in name = "CLAWDINATOR Bot"; email = "clawdinator[bot]@users.noreply.github.com"; }; + safe = { + directory = [ "/var/lib/clawd/repos/clawdinators" ]; + }; }; }; diff --git a/scripts/fetch-ec2-metadata.sh b/scripts/fetch-ec2-metadata.sh new file mode 100644 index 0000000..3321cf1 --- /dev/null +++ b/scripts/fetch-ec2-metadata.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +metaDir=/etc/ec2-metadata +mkdir -p "$metaDir" +chmod 0755 "$metaDir" +rm -f "$metaDir/*" + +get_imds_token() { + # retry-delay of 1 selected to give the system a second to get going, + # but not add a lot to the bootup time + curl \ + --silent \ + --show-error \ + --retry 3 \ + --retry-delay 1 \ + --fail \ + -X PUT \ + --connect-timeout 1 \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 600" \ + http://169.254.169.254/latest/api/token +} + +preflight_imds_token() { + # retry-delay of 1 selected to give the system a second to get going, + # but not add a lot to the bootup time + curl \ + --silent \ + --show-error \ + --retry 3 \ + --retry-delay 1 \ + --fail \ + --connect-timeout 1 \ + -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \ + -o /dev/null \ + http://169.254.169.254/1.0/meta-data/instance-id +} + +try=1 +while [ $try -le 3 ]; do + echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..." + IMDS_TOKEN=$(get_imds_token) && break + try=$((try + 1)) + sleep 1 +done + +if [ "$IMDS_TOKEN" == "" ]; then + echo "failed to fetch an IMDS2v token." +fi + +try=1 +while [ $try -le 10 ]; do + echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..." + preflight_imds_token && break + try=$((try + 1)) + sleep 1 +done + +echo "getting EC2 instance metadata..." + +get_imds() { + # --fail to avoid populating missing files with 404 HTML response body + # || true to allow the script to continue even when encountering a 404 + curl --silent --show-error --fail --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@" || true +} + +get_imds -o "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path +(umask 077 && get_imds -o "$metaDir/user-data" http://169.254.169.254/1.0/user-data) +get_imds -o "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname +get_imds -o "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key