mineracks-ckbunker-hsm-sign/ckbunker_hsm_sign/scraper.py
mineracks 9d380f5013 Initial import: CKBunker HSM validation harness
WebSocket client + CLI harness + pytest suite that exercises each axis of
a CKBunker + Coldcard Mk4 policy and asserts the expected outcomes, including
the critical negative test that a large PSBT without TOTP is rejected with
a specific 'rule #1: need user(s) confirmation' reason.

Configuration via .env / YAML / CLI flags, two pre-crafted test PSBTs as
fixtures (generation guide in fixtures/README.md), dashboard counter
scraper as sanity check, design rationale in docs/.
2026-04-14 10:50:04 +10:00

90 lines
2.6 KiB
Python

"""Dashboard scraper.
The harness wants to verify that *the server-visible counters moved* after
each test — a sanity check against "signer returned a PSBT but the server
didn't actually account for it". CKBunker renders these counters into the
top of every page, so we just do a regex pass over the HTML.
This is intentionally tolerant: CKBunker versions vary slightly in the
markup. If we can't find a value we return None, and the counter assertions
in the harness treat that as a soft skip rather than a hard fail.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
import requests
@dataclass
class DashboardCounters:
approvals: int | None
refusals: int | None
amount_spent_btc: float | None
period_ends: str | None
def fetch_counters(
base_url: str,
*,
cf_client_id: str | None = None,
cf_client_secret: str | None = None,
timeout: float = 15.0,
) -> DashboardCounters:
headers: dict[str, str] = {}
if cf_client_id:
headers["CF-Access-Client-Id"] = cf_client_id
if cf_client_secret:
headers["CF-Access-Client-Secret"] = cf_client_secret
resp = requests.get(base_url.rstrip("/") + "/", headers=headers, timeout=timeout)
resp.raise_for_status()
html = resp.text
return DashboardCounters(
approvals=_pluck_int(html, ["Approvals"]),
refusals=_pluck_int(html, ["Refusals"]),
amount_spent_btc=_pluck_btc(html, ["Amount Spent"]),
period_ends=_pluck_text(html, ["Period Ends"]),
)
def _pluck_int(html: str, labels: list[str]) -> int | None:
# Matches either:
# <th>Approvals</th> ... <td>2</td>
# <div>Approvals</div><div class="...">2</div>
# keeps a small search window after each label.
for label in labels:
m = re.search(rf"{re.escape(label)}.{{0,500}}?>\s*(\d+)\s*<", html, re.S)
if m:
try:
return int(m.group(1))
except ValueError:
continue
return None
def _pluck_btc(html: str, labels: list[str]) -> float | None:
for label in labels:
m = re.search(
rf"{re.escape(label)}.{{0,500}}?>\s*([0-9]+\.[0-9]+)\s*BTC",
html,
re.S,
)
if m:
try:
return float(m.group(1))
except ValueError:
continue
return None
def _pluck_text(html: str, labels: list[str]) -> str | None:
for label in labels:
m = re.search(rf"{re.escape(label)}.{{0,500}}?>\s*([^<\s][^<]{{0,40}}?)\s*<", html, re.S)
if m:
return m.group(1).strip()
return None