* fix(precompute): correct cache validation and path separator - Fix cache file size validation rejecting valid files: use 1 byte/point minimum bound instead of hardcoded 65 bytes, since infinity points are serialized as 1 byte (flag only, no x/y coordinates). Addresses #116. - Fix path separator: use '/' unconditionally instead of Windows backslash, since forward slashes work on all platforms. Addresses #117. * fix(ci): fuzz_point k=n-1 edge case, benchmark noise filter - fuzz_point: handle k=n-1 where (k+1)*G = 0*G = infinity (pre-existing bug) - parse_benchmark.py: add MIN_REGRESSION_NS=25.0 filter to skip sub-25ns micro-ops that produce false positive regressions on shared CI runners * fix(ci): use fixed -march=x86-64-v3 with ccache, widen benchmark threshold - CMakeLists.txt: add SECP256K1_MARCH option to override -march=native Prevents SIGILL when ccache restores objects from a different CI runner CPU - All x86-64 workflows with ccache: pass -DSECP256K1_MARCH=x86-64-v3 - bench-regression.yml: increase alert-threshold from 150% to 200% Shared CI runners show up to ~60% variance on micro-ops * fix(ci): raise MIN_REGRESSION_NS to 50ns field_mul (15.8ns baseline) inflated to 32.5ns on CI runner (2.06x), exceeding even 200% threshold. Sub-50ns micro-ops are too granular for shared runner regression detection. Still shown in Dashboard. * add ABI layout guards and CRYPTO_INVARIANTS.md - ufsecp.h: static_assert guards on struct sizes and constant lengths to catch ABI-breaking changes at compile time (C++ and C11) - docs/CRYPTO_INVARIANTS.md: comprehensive crypto invariant reference covering private key validation, nonce safety (RFC 6979), pubkey validation, signature encoding, hash domain separation, point arithmetic, key tweaking, serialization byte order, thread safety, and constant-time guarantees --------- Co-authored-by: shrec <shrec@users.noreply.github.com>
186 lines
6.1 KiB
Python
186 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Parse benchmark output from bench_unified (or legacy bench_comprehensive)
|
|
into github-action-benchmark compatible JSON (customSmallerIsBetter format).
|
|
|
|
Input: Raw text output from bench_unified or bench_comprehensive
|
|
Output: JSON file with benchmark entries
|
|
|
|
Supported input formats:
|
|
|
|
bench_unified table (ns/op, always nanoseconds):
|
|
| field_mul | 8.2 |
|
|
| Generator * k (FAST) | 5841.0 |
|
|
|
|
Legacy bench_comprehensive (name: value unit):
|
|
scalar_mul (K*G): 18.45 us (54,201 ops/sec)
|
|
field_mul: 8.23 ns (121,506,682 ops/sec)
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def parse_benchmark_output(text: str) -> list[dict]:
|
|
"""Parse benchmark text output into benchmark entries."""
|
|
entries = []
|
|
seen = set()
|
|
|
|
# Minimum duration (ns) for regression tracking. Operations faster than
|
|
# this are too short to measure reliably on shared CI runners (GitHub
|
|
# Actions ubuntu-latest): scheduling jitter and timer granularity alone
|
|
# can cause 50-100% variance on sub-50ns timings. These entries are
|
|
# still shown in the Benchmark Dashboard; they are only excluded from
|
|
# the Perf Regression Gate to avoid false alerts.
|
|
MIN_REGRESSION_NS = 50.0
|
|
|
|
# Sections whose entries should be excluded from regression comparison.
|
|
# MICRO-DIAGNOSTICS are sub-operation benchmarks that may legitimately
|
|
# diverge from end-to-end performance (e.g. per-op speed traded for
|
|
# better pipeline ILP via inlining).
|
|
excluded_sections = {'MICRO-DIAGNOSTICS'}
|
|
|
|
# Pattern 1 (bench_unified): table rows "| name | value |"
|
|
# Matches numeric ns/op values only; skips headers (ns/op) and ratios (1.23x).
|
|
table_pattern = re.compile(
|
|
r'^\|\s+(.+?)\s+\|\s+([\d,]+(?:\.[\d]+)?)\s+\|$',
|
|
re.MULTILINE
|
|
)
|
|
# Section header: "| SECTION NAME (extra) | ns/op |"
|
|
section_pattern = re.compile(
|
|
r'^\|\s+(.+?)\s+\|\s+ns/op\s+\|$',
|
|
re.MULTILINE
|
|
)
|
|
|
|
# Build a map of position -> section name for section-aware filtering
|
|
section_starts = []
|
|
for m in section_pattern.finditer(text):
|
|
section_starts.append((m.start(), m.group(1).strip()))
|
|
|
|
def get_section(pos: int) -> str:
|
|
"""Return the section name for a given text position."""
|
|
current = ''
|
|
for start, name in section_starts:
|
|
if start > pos:
|
|
break
|
|
current = name
|
|
return current
|
|
|
|
for match in table_pattern.finditer(text):
|
|
name = match.group(1).strip()
|
|
value_str = match.group(2).replace(',', '')
|
|
# Skip header rows
|
|
if name == '' or value_str == '' or 'ns/op' in name.lower():
|
|
continue
|
|
try:
|
|
value_ns = float(value_str)
|
|
except ValueError:
|
|
continue
|
|
# Skip entries from excluded sections
|
|
section = get_section(match.start())
|
|
if any(excl in section for excl in excluded_sections):
|
|
continue
|
|
# Skip sub-threshold micro-ops (too noisy on shared runners)
|
|
if value_ns < MIN_REGRESSION_NS:
|
|
continue
|
|
if name not in seen:
|
|
seen.add(name)
|
|
entries.append({
|
|
'name': name,
|
|
'unit': 'ns',
|
|
'value': round(value_ns, 2),
|
|
})
|
|
|
|
# Pattern 2 (legacy): "name: value unit (ops/sec)"
|
|
# Examples:
|
|
# scalar_mul: 18.45 us
|
|
# field_mul: 8.23 ns
|
|
# Name class includes = for entries like "Batch Inverse (n=100)".
|
|
legacy_pattern = re.compile(
|
|
r'^\s*([a-zA-Z0-9_\s\(\)/\*\+\-=]+?):\s+'
|
|
r'([\d,\.]+)\s*(ns|us|ms|s)\b',
|
|
re.MULTILINE
|
|
)
|
|
|
|
for match in legacy_pattern.finditer(text):
|
|
name = match.group(1).strip()
|
|
value_str = match.group(2).replace(',', '')
|
|
unit = match.group(3)
|
|
|
|
if name in seen:
|
|
continue
|
|
|
|
# In bench_unified output, all standalone benchmarks use table-format
|
|
# rows (Pattern 1 above). Legacy-format printf lines that appear
|
|
# within bench_unified sections are diagnostic/derived metrics (cost
|
|
# decompositions, UNEXPLAINED gap, RFC6979 overhead, etc.), not
|
|
# independent benchmarks. Skip them to avoid false regression alerts
|
|
# on noisy computed values.
|
|
section = get_section(match.start())
|
|
if section:
|
|
continue
|
|
|
|
try:
|
|
value = float(value_str)
|
|
except ValueError:
|
|
continue
|
|
|
|
# Normalize to nanoseconds for consistent comparison
|
|
multiplier = {
|
|
'ns': 1.0,
|
|
'us': 1000.0,
|
|
'ms': 1_000_000.0,
|
|
's': 1_000_000_000.0,
|
|
}.get(unit, 1.0)
|
|
|
|
value_ns = value * multiplier
|
|
# Skip sub-threshold micro-ops (too noisy on shared runners)
|
|
if value_ns < MIN_REGRESSION_NS:
|
|
continue
|
|
seen.add(name)
|
|
|
|
entries.append({
|
|
'name': name,
|
|
'unit': 'ns',
|
|
'value': round(value_ns, 2),
|
|
})
|
|
|
|
return entries
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 3:
|
|
print(f"Usage: {sys.argv[0]} <input.txt> <output.json>")
|
|
sys.exit(1)
|
|
|
|
input_path = Path(sys.argv[1])
|
|
output_path = Path(sys.argv[2])
|
|
|
|
if not input_path.exists():
|
|
print(f"Error: {input_path} not found")
|
|
sys.exit(1)
|
|
|
|
text = input_path.read_text(encoding='utf-8', errors='replace')
|
|
entries = parse_benchmark_output(text)
|
|
|
|
if not entries:
|
|
# No benchmarks parsed -- this is a HARD FAILURE.
|
|
# A dummy entry would hide real regressions by making future comparisons
|
|
# succeed against a meaningless baseline.
|
|
print("Error: No benchmark entries parsed from output -- failing")
|
|
print(" Input file:", input_path)
|
|
print(" Input size:", input_path.stat().st_size, "bytes")
|
|
print(" First 500 chars of input:")
|
|
print(text[:500])
|
|
sys.exit(1)
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(entries, indent=2))
|
|
print(f"Wrote {len(entries)} benchmark entries to {output_path}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|