Tighten audits and optimize batch/MSM hot paths

This commit is contained in:
shrec 2026-03-17 04:39:11 +00:00
parent 19085b5a79
commit 06a6699750
No known key found for this signature in database
12 changed files with 2030 additions and 15 deletions

67
AGENTS.md Normal file
View File

@ -0,0 +1,67 @@
# AGENTS.md -- UltrafastSecp256k1
Use the SQLite project graph before broad file search.
## Canonical Graph
- DB: `.project_graph.db`
- Rebuild:
```bash
python3 scripts/build_project_graph.py --rebuild
```
## Preferred Workflow
1. Query graph first.
2. Read only the files or line ranges the graph points to.
3. After structural changes, rebuild the graph.
4. Before finishing, rerun `preflight.py` if the change is substantial.
## Most Useful Commands
```bash
python3 scripts/query_graph.py context cpu/src/ct_sign.cpp
python3 scripts/query_graph.py impact cpu/src/ecdh.cpp
python3 scripts/query_graph.py routing ecdsa_sign
python3 scripts/query_graph.py tags
python3 scripts/query_graph.py tag constant_time
python3 scripts/query_graph.py symbol ecdsa_sign
python3 scripts/query_graph.py optimize 15
python3 scripts/query_graph.py risk 15
python3 scripts/query_graph.py gpuwork 15
python3 scripts/query_graph.py fragile 15
python3 scripts/query_graph.py hotspots 20
python3 scripts/query_graph.py coverage ecdsa_sign
```
## Reasoning Layers
The graph includes more than structure. It also includes:
- semantic classification
- secret/CT metadata
- parser-boundary sensitivity
- performance/gpu-candidate scoring
- audit coverage
- change history
- risk/gain/optimization priority
Important tables/views:
- `semantic_tags`
- `entity_tags`
- `symbol_semantics`
- `symbol_security`
- `symbol_performance`
- `symbol_audit_coverage`
- `symbol_history`
- `symbol_scores`
- `v_symbol_reasoning`
## Rules
- Do not claim CT guarantees without checking the graph and the relevant tests.
- Do not claim audit coverage without checking `function_test_map` or `symbol_audit_coverage`.
- Do not change ABI-visible or secret-bearing code blindly; query `routing`, `bindings`, and `fragile` first.
- If you add new graph-worthy entities, update the graph builder.

View File

@ -22,6 +22,7 @@
#include "secp256k1/ct/ops.hpp"
#include "secp256k1/ct_utils.hpp"
#include "secp256k1/sanitizer_scale.hpp"
#include "secp256k1/coins/wallet.hpp"
using namespace secp256k1::fast;
@ -98,6 +99,53 @@ static void test_zero_key_handling() {
printf(" %d checks\n\n", g_pass);
}
static void test_wallet_private_key_strictness() {
g_section = "wallet_strict";
printf("[1b] Wallet private key strictness\n");
using secp256k1::coins::wallet::from_private_key;
static constexpr std::array<uint8_t, 32> ORDER_N = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
};
static constexpr std::array<uint8_t, 32> ORDER_N_PLUS_1 = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x42
};
std::array<uint8_t, 32> all_ff{};
all_ff.fill(0xFF);
std::array<uint8_t, 32> valid{};
valid[31] = 0x01;
{
auto [key, ok] = from_private_key(valid.data());
CHECK(ok, "wallet accepts canonical key 1");
CHECK(!key.priv.is_zero(), "wallet canonical key remains nonzero");
}
{
auto [key, ok] = from_private_key(ORDER_N.data());
CHECK(!ok, "wallet rejects key == n");
CHECK(key.priv.is_zero(), "wallet rejected n leaves zero key");
}
{
auto [key, ok] = from_private_key(ORDER_N_PLUS_1.data());
CHECK(!ok, "wallet rejects key == n+1");
CHECK(key.priv.is_zero(), "wallet rejected n+1 leaves zero key");
}
{
auto [key, ok] = from_private_key(all_ff.data());
CHECK(!ok, "wallet rejects all-ff key");
CHECK(key.priv.is_zero(), "wallet rejected all-ff leaves zero key");
}
printf(" %d checks\n\n", g_pass);
}
// ============================================================================
// 2. Secret zeroization via ct_memzero
// ============================================================================
@ -462,6 +510,7 @@ int audit_security_run() {
g_pass = 0; g_fail = 0;
test_zero_key_handling();
test_wallet_private_key_strictness();
test_zeroization();
test_bitflip_resilience();
test_message_bitflip();

View File

@ -9,7 +9,7 @@
// Computes: R = s_1*P_1 + s_2*P_2 + ... + s_n*P_n
//
// Algorithm (bucket method, a.k.a. Pippenger):
// 1. Choose window width c = floor(log2(n)) (adaptive).
// 1. Choose window width c from an empirically tuned CPU heuristic.
// 2. Represent each scalar s in base-2^c digits.
// 3. For each digit position j (from MSB to LSB):
// a. Scatter: place P into bucket[digit_j(s)] for all i.
@ -18,11 +18,11 @@
// c. Combine: R = R*2^c + Sum
//
// Complexity: O(n/c + 2^c + 256*dbl) vs Strauss O(256 + n*2^(w-1))
// Crossover: Pippenger wins for n > ~128 (verified empirically).
// Current CPU crossover: Pippenger wins around n ~= 48.
//
// This implementation:
// - Pre-allocates all buckets in a single flat array (no heap per iteration)
// - Supports signed digits (2NAF-style) for halved bucket count
// - Uses predecoded digits and bucket reuse on the optimized CPU path
// - Falls back to Strauss for small n
//
// Reference: Bernstein, Doumen, Lange, Oosterwijk (2012),
@ -46,7 +46,7 @@ namespace secp256k1 {
// points - array of n points
// n - number of scalar-point pairs
//
// Performance: O(n/c + 2^c) per window, c ~= log2(n).
// Performance: O(n/c + 2^c) per window, with c chosen from measured bands.
// n=256: ~4x faster than Strauss
// n=1024: ~8x faster than Strauss
// n=4096: ~12x faster than Strauss
@ -61,11 +61,11 @@ fast::Point pippenger_msm(const std::vector<fast::Scalar>& scalars,
// -- Optimal Window Width -----------------------------------------------------
// Returns the optimal bucket window width c for n points.
// Minimizes: floor(256/c) * (n + 2^c) total point operations.
// Uses measured CPU bands, not just the textbook floor(log2(n)) heuristic.
unsigned pippenger_optimal_window(std::size_t n);
// -- Unified MSM (auto-selects best algorithm) --------------------------------
// Automatically picks Strauss (n <= 128) or Pippenger (n > 128).
// Automatically picks Strauss for very small MSMs and Pippenger from n >= 48.
fast::Point msm(const fast::Scalar* scalars,
const fast::Point* points,
std::size_t n);

View File

@ -52,8 +52,10 @@ std::pair<WalletKey, bool> from_private_key(const std::uint8_t* priv32) {
WalletKey key{};
// Parse and validate private key
auto scalar = fast::Scalar::from_bytes(priv32);
if (scalar.is_zero()) return {key, false};
fast::Scalar scalar;
if (!fast::Scalar::parse_bytes_strict_nonzero(priv32, scalar)) {
return {key, false};
}
key.priv = scalar;
key.pub = derive_public_key(scalar);

View File

@ -333,7 +333,28 @@ static void test_empty() {
check(true, "empty batch: no crash");
}
// -- Test 9: Negate table correctness -----------------------------------------
// -- Test 9: Small precompute edge cases -------------------------------------
static void test_precompute_small_edge_cases() {
(void)std::printf("[BatchAffine] Small precompute edge cases...\n");
auto empty_g = precompute_g_multiples(0);
check(empty_g.empty(), "precompute_g_multiples(0) empty");
auto one_g = precompute_g_multiples(1);
check(one_g.size() == 1, "precompute_g_multiples(1) size");
Point const G = scalar_mul_generator(Scalar::one());
check(one_g[0].x == G.x(), "precompute_g_multiples(1) x == G.x");
check(one_g[0].y == G.y(), "precompute_g_multiples(1) y == G.y");
Point const Q = scalar_mul_generator(Scalar::from_uint64(9));
auto one_q = precompute_point_multiples(Q.x(), Q.y(), 1);
check(one_q.size() == 1, "precompute_point_multiples(1) size");
check(one_q[0].x == Q.x(), "precompute_point_multiples(1) x == Q.x");
check(one_q[0].y == Q.y(), "precompute_point_multiples(1) y == Q.y");
}
// -- Test 10: Negate table correctness ----------------------------------------
static void test_negate_table() {
(void)std::printf("[BatchAffine] Negate table...\n");
@ -369,6 +390,7 @@ int test_batch_add_affine_run() {
test_bidirectional();
test_parity();
test_arbitrary_point_table();
test_precompute_small_edge_cases();
test_negate_table();
test_large_batch();

View File

@ -84,6 +84,10 @@ static void hex_to_bytes(const char* hex, uint8_t* out, size_t len) {
// Well-known test private key (Bitcoin wiki example)
static constexpr const char* TEST_PRIVKEY_HEX =
"e8f32e723decf4051aefac8e2c93c9c5b214313817cdb01a1494b917c8436b35";
static constexpr const char* ORDER_N_HEX =
"fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141";
static constexpr const char* ORDER_N_PLUS_1_HEX =
"fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364142";
// ============================================================================
// 1. from_private_key
@ -108,6 +112,36 @@ static void test_from_private_key_zero() {
PASS();
}
static void test_from_private_key_order_n() {
TEST("from_private_key: n rejected");
uint8_t priv[32];
hex_to_bytes(ORDER_N_HEX, priv, 32);
auto [key, ok] = from_private_key(priv);
ASSERT_TRUE(!ok, "should fail for n");
ASSERT_TRUE(key.priv.is_zero(), "rejected key remains zero");
PASS();
}
static void test_from_private_key_order_n_plus_1() {
TEST("from_private_key: n+1 rejected");
uint8_t priv[32];
hex_to_bytes(ORDER_N_PLUS_1_HEX, priv, 32);
auto [key, ok] = from_private_key(priv);
ASSERT_TRUE(!ok, "should fail for n+1");
ASSERT_TRUE(key.priv.is_zero(), "rejected key remains zero");
PASS();
}
static void test_from_private_key_all_ff() {
TEST("from_private_key: all-ff rejected");
uint8_t priv[32];
std::memset(priv, 0xFF, sizeof(priv));
auto [key, ok] = from_private_key(priv);
ASSERT_TRUE(!ok, "should fail for all-ff");
ASSERT_TRUE(key.priv.is_zero(), "rejected key remains zero");
PASS();
}
// ============================================================================
// 2-4. Address generation via wallet API
// ============================================================================
@ -660,6 +694,9 @@ int test_wallet_run() {
std::printf("\n--- Key Management ---\n");
test_from_private_key_valid();
test_from_private_key_zero();
test_from_private_key_order_n();
test_from_private_key_order_n_plus_1();
test_from_private_key_all_ff();
// Address generation
std::printf("\n--- Address Generation ---\n");

View File

@ -143,7 +143,7 @@ For computing `sum(k_i * P_i)`:
| Multi-scalar (Straus) | 250 us | 1,800 us |
| Multi-scalar (Pippenger) | -- | 900 us |
Pippenger is automatically selected when N > 64.
Pippenger is automatically selected when `N >= 48` on the current optimized CPU path, with predecoded digits and bucket reuse to reduce scatter/aggregate overhead.
---

View File

@ -0,0 +1,233 @@
# Project Graph Reasoning
UltrafastSecp256k1 ships with a SQLite-backed project graph at:
- `.project_graph.db`
This is not only a code index. It is a cryptographic engineering knowledge base
used by humans and AI agents to reason about:
- subsystem boundaries
- constant-time and secret-bearing paths
- parser boundaries
- audit coverage
- optimization opportunity vs. audit risk
- CPU-to-GPU candidate selection
- change-history-sensitive review targets
## Rebuild
```bash
python3 scripts/build_project_graph.py --rebuild
```
## What The Graph Contains
Core structural layers:
- source files
- include dependencies
- C ABI functions
- ABI routing
- C++ methods
- test targets
- audit modules
- docs
- call graph
- function-to-test map
Reasoning layers:
- `semantic_tags`
- `entity_tags`
- `symbol_semantics`
- `symbol_security`
- `symbol_performance`
- `symbol_audit_coverage`
- `symbol_history`
- `symbol_scores`
- `v_symbol_reasoning`
## Semantic Classification
Symbols and files are tagged with higher-level meaning, for example:
- `field_arithmetic`
- `scalar_arithmetic`
- `point_arithmetic`
- `modinv`
- `ecdsa`
- `schnorr`
- `ecdh`
- `bip352`
- `hashing`
- `ffi_abi`
- `wallet_flow`
- `gpu_acceleration`
- `constant_time`
- `parser_boundary`
- `audit_evidence`
This allows graph queries to operate on intent, not only filenames.
## Security Metadata
Each symbol may carry reasoning fields such as:
- `uses_secret_input`
- `must_be_constant_time`
- `public_data_only`
- `device_secret_upload`
- `requires_zeroization`
- `invalid_input_sensitive`
This makes it possible to separate:
- public verification code
- secret-bearing signing / derivation code
- parser-boundary code
- GPU/offload-sensitive flows
## Performance Metadata
Each symbol also gets estimated engineering metadata:
- `hotness_score`
- `estimated_cost`
- `batchable`
- `vectorizable`
- `gpu_candidate`
- `memory_bound`
- `compute_bound`
- `duplicated_backends`
This is heuristic, not a benchmark replacement. It is meant to guide review and
optimization triage.
## Audit Coverage Metadata
The graph records whether a symbol is covered by:
- unit tests
- fuzzing
- invalid-vector style tests
- CT tests
- cross-implementation differential checks
- GPU-equivalence style checks
- regression-style tests
It also stores:
- `last_audit_result`
- `times_failed_historically`
- `known_fragile`
## History Layer
The graph uses git history to derive:
- `times_modified`
- `recently_modified`
- `bug_fix_count`
- `performance_tuning_count`
- `audit_related_changes`
This helps prioritize review on recently changing, fragile, or heavily tuned
paths.
## Risk And Gain Scoring
Every symbol gets:
- `risk_score`
- `gain_score`
- `optimization_priority`
The current scoring is heuristic and intended for triage, not for automated
proof of correctness. It is useful for answering:
- which optimization candidates are high-gain and relatively lower-risk
- which secret-bearing or parser-sensitive symbols deserve review first
- which CPU paths look like strong GPU-offload candidates
## Query Commands
### Structural
```bash
python3 scripts/query_graph.py context cpu/src/ct_sign.cpp
python3 scripts/query_graph.py impact cpu/src/ecdh.cpp
python3 scripts/query_graph.py callgraph pippenger_msm
python3 scripts/query_graph.py coverage ecdsa_sign
```
### Semantic
```bash
python3 scripts/query_graph.py tags
python3 scripts/query_graph.py tag constant_time
python3 scripts/query_graph.py symbol ecdsa_sign
```
### Optimization / Audit Triage
```bash
python3 scripts/query_graph.py optimize 20
python3 scripts/query_graph.py risk 20
python3 scripts/query_graph.py gpuwork 20
python3 scripts/query_graph.py fragile 20
```
## Recommended Workflow
### Before editing a file
```bash
python3 scripts/query_graph.py context <file>
python3 scripts/query_graph.py impact <file>
```
### Before touching secret-bearing code
```bash
python3 scripts/query_graph.py security <file>
python3 scripts/query_graph.py fragile 20
python3 scripts/query_graph.py tag constant_time
```
### Before changing the C ABI
```bash
python3 scripts/query_graph.py routing <name>
python3 scripts/query_graph.py bindings
python3 scripts/query_graph.py tag ffi_surface
```
### Before proposing an optimization
```bash
python3 scripts/query_graph.py optimize 20
python3 scripts/query_graph.py gpuwork 20
python3 scripts/query_graph.py risk 20
```
## Machine-Readable Export
The reasoning graph is exported through:
```bash
python3 scripts/export_assurance.py -o assurance_report.json
```
The exported JSON includes:
- semantic tag inventory
- reasoning summary by category/backend
- optimization candidates
- risk hotspots
## Notes
- The reasoning layer is intentionally heuristic.
- It should guide review and AI assistance, not replace cryptographic judgment.
- If graph builder logic changes, rebuild the DB before relying on query output.

View File

@ -17,6 +17,9 @@
| [ESP32 Setup](ESP32_SETUP.md) | ESP32-S3/PICO-D4 flashing & testing guide |
| [RISC-V Optimizations](../RISCV_OPTIMIZATIONS.md) | RISC-V assembly & RVV details |
| [Porting Guide](../PORTING.md) | Add new platforms, architectures, GPU backends |
| [Project Graph Reasoning](PROJECT_GRAPH_REASONING.md) | SQLite code graph, semantic tags, symbol reasoning, and optimization/audit workflows |
| `scripts/query_graph.py tags` | Semantic tag inventory from the repository knowledge graph |
| `scripts/query_graph.py symbol/optimize/risk` | Symbol-level crypto reasoning, gain/risk scoring, and optimization candidates |
## Security & Audit

File diff suppressed because it is too large Load Diff

View File

@ -145,6 +145,58 @@ def export_routing_summary(conn):
return {r['layer']: r['count'] for r in rows}
def export_semantic_tags(conn):
"""Semantic tag inventory and densest entities."""
tags = conn.execute("""
SELECT st.tag, st.domain, st.description, COUNT(et.id) AS entities
FROM semantic_tags st
LEFT JOIN entity_tags et ON et.tag = st.tag
GROUP BY st.tag, st.domain, st.description
ORDER BY entities DESC, st.tag
""").fetchall()
top_entities = conn.execute("""
SELECT entity_type, entity_id, COUNT(*) AS tag_count,
GROUP_CONCAT(tag, ', ') AS tags
FROM entity_tags
GROUP BY entity_type, entity_id
ORDER BY tag_count DESC, entity_type, entity_id
LIMIT 25
""").fetchall()
return {
'inventory': [dict(r) for r in tags],
'top_entities': [dict(r) for r in top_entities],
}
def export_symbol_reasoning(conn):
"""Reasoning-oriented symbol inventory for optimization and audit workflows."""
summary = conn.execute("""
SELECT category, backend, COUNT(*) AS symbols,
AVG(risk_score) AS avg_risk,
AVG(gain_score) AS avg_gain
FROM v_symbol_reasoning
GROUP BY category, backend
ORDER BY symbols DESC, category, backend
""").fetchall()
optimize = conn.execute("""
SELECT symbol_name, file_path, category, backend, risk_score, gain_score, optimization_priority
FROM v_symbol_reasoning
ORDER BY optimization_priority DESC, gain_score DESC
LIMIT 25
""").fetchall()
risk = conn.execute("""
SELECT symbol_name, file_path, category, secret_class, risk_score, gain_score
FROM v_symbol_reasoning
ORDER BY risk_score DESC, gain_score DESC
LIMIT 25
""").fetchall()
return {
'summary': [dict(r) for r in summary],
'optimization_candidates': [dict(r) for r in optimize],
'risk_hotspots': [dict(r) for r in risk],
}
def export_graph_meta(conn):
"""Graph metadata."""
meta = {}
@ -181,6 +233,8 @@ def main():
'security_density': export_security_density(conn),
'protocol_status': export_protocol_status(conn),
'routing_summary': export_routing_summary(conn),
'semantic_tags': export_semantic_tags(conn),
'symbol_reasoning': export_symbol_reasoning(conn),
}
conn.close()

View File

@ -26,6 +26,12 @@ Usage Examples:
python3 scripts/query_graph.py gaps
python3 scripts/query_graph.py summary
python3 scripts/query_graph.py sql "SELECT * FROM error_codes"
python3 scripts/query_graph.py callgraph ecdsa_sign
python3 scripts/query_graph.py hotspots 10
python3 scripts/query_graph.py dead
python3 scripts/query_graph.py aliases ecdsa_sign
python3 scripts/query_graph.py coverage field_mul
python3 scripts/query_graph.py config cmake_option
"""
import sqlite3
@ -94,6 +100,15 @@ def cmd_search(query: str):
print(f"\nDOCS ({len(rows)}):")
for r in rows:
print(f" {r['path']} ({r['category']})")
# Semantic tags
rows = conn.execute("""SELECT entity_type, entity_id, tag, domain
FROM fts_tags WHERE fts_tags MATCH ?
LIMIT 20""", (fts_query,)).fetchall()
if rows:
print(f"\nSEMANTIC TAGS ({len(rows)}):")
for r in rows:
print(f" [{r['entity_type']}] {r['entity_id']} -> {r['tag']} ({r['domain']})")
conn.close()
@ -563,6 +578,358 @@ def cmd_preflight(mode: str = None):
args.append(mode)
subprocess.run(args)
# ---------------------------------------------------------------------------
# PHASE 4: new query commands
# ---------------------------------------------------------------------------
def cmd_callgraph(func_name: str):
"""Show call graph for a function: who calls it (callers) and who it calls (callees)."""
conn = get_conn()
print(f"=== Call Graph: {func_name} ===\n")
# Callers
callers = conn.execute("""SELECT DISTINCT caller_func, caller_file, call_line
FROM call_edges WHERE callee_func LIKE ?
ORDER BY caller_file, call_line""", (f'%{func_name}%',)).fetchall()
if callers:
print(f"CALLERS ({len(callers)}):")
for r in callers:
print(f" {r['caller_func']:40s} ({r['caller_file']}:L{r['call_line']})")
else:
print("CALLERS: none found in call graph")
# Callees
callees = conn.execute("""SELECT DISTINCT callee_func, callee_file, call_line
FROM call_edges WHERE caller_func LIKE ?
ORDER BY call_line""", (f'%{func_name}%',)).fetchall()
if callees:
print(f"\nCALLEES ({len(callees)}):")
for r in callees:
print(f" L{r['call_line'] or '?':4} {r['callee_func']:40s} {r['callee_file'] or '(unknown)'}")
else:
print("\nCALLEES: none found in call graph")
conn.close()
def cmd_hotspots(top_n: str = '15'):
"""Show top N hotspot files ranked by composite risk score."""
try:
n = int(top_n)
except (TypeError, ValueError):
n = 15
conn = get_conn()
rows = conn.execute("""SELECT file_path, hotspot_score, coupling_score,
security_density, test_coverage_gap,
null_risk_score, reasons
FROM hotspot_scores
ORDER BY hotspot_score DESC LIMIT ?""", (n,)).fetchall()
if not rows:
print("No hotspot data. Rebuild graph with: python3 scripts/build_project_graph.py --rebuild")
conn.close()
return
print(f"=== Top {n} Hotspot Files ===\n")
print(f" {'SCORE':>5} {'COUP':>5} {'SEC':>5} {'GAP':>4} FILE")
print(f" {'-'*5} {'-'*5} {'-'*5} {'-'*4} {'-'*50}")
for r in rows:
reasons = r['reasons'] or '[]'
try:
rl = json.loads(reasons)
except Exception:
rl = []
tag = ','.join(rl[:2])
print(f" {r['hotspot_score']:>5.2f} {r['coupling_score']:>5.2f} "
f"{r['security_density']:>5.2f} {r['test_coverage_gap']:>4.0f} "
f"{r['file_path']} [{tag}]")
conn.close()
def cmd_dead(filter_str: str = None):
"""Show potentially dead/unreachable code from reachability analysis."""
conn = get_conn()
if filter_str:
rows = conn.execute("""SELECT symbol, file_path, dead_reason, reach_via
FROM reachability WHERE is_reachable=0 AND (symbol LIKE ? OR file_path LIKE ?)
ORDER BY file_path, symbol""",
(f'%{filter_str}%', f'%{filter_str}%')).fetchall()
else:
rows = conn.execute("""SELECT symbol, file_path, dead_reason, reach_via
FROM reachability WHERE is_reachable=0 ORDER BY file_path, symbol""").fetchall()
total = conn.execute("SELECT COUNT(*) AS cnt FROM reachability WHERE is_reachable=0").fetchone()['cnt']
reachable = conn.execute("SELECT COUNT(*) AS cnt FROM reachability WHERE is_reachable=1").fetchone()['cnt']
print(f"=== Dead Code Analysis ({total} unreachable / {reachable + total} total) ===\n")
if not rows:
print(" No unreachable functions found (or graph not built with call edges).")
conn.close()
return
cur_file = None
for r in rows[:100]:
if r['file_path'] != cur_file:
cur_file = r['file_path']
print(f"\n {cur_file}:")
print(f" {r['symbol']:40s} [{r['dead_reason'] or 'no_caller'}]")
if len(rows) > 100:
print(f"\n ... and {len(rows) - 100} more (use filter to narrow down)")
conn.close()
def cmd_aliases(symbol: str = None):
"""Show symbol aliases and similar names (variant/typo detection)."""
conn = get_conn()
if symbol:
rows = conn.execute("""SELECT canonical, alias, similarity, kind
FROM symbol_aliases
WHERE canonical LIKE ? OR alias LIKE ?
ORDER BY similarity DESC""",
(f'%{symbol}%', f'%{symbol}%')).fetchall()
else:
rows = conn.execute("""SELECT canonical, alias, similarity, kind
FROM symbol_aliases ORDER BY similarity DESC LIMIT 50""").fetchall()
total = conn.execute("SELECT COUNT(*) AS cnt FROM symbol_aliases").fetchone()['cnt']
print(f"=== Symbol Aliases ({total} total, showing {len(rows)}) ===\n")
if not rows:
print(" No aliases found. (Rebuild graph to generate.)")
conn.close()
return
for r in rows:
print(f" [{r['kind']:12s}] {r['similarity']:.3f} {r['canonical']:40s} ~= {r['alias']}")
conn.close()
def cmd_coverage(func_name: str = None):
"""Show which test targets cover a function (function-level coverage map)."""
conn = get_conn()
if func_name:
rows = conn.execute("""SELECT ftm.function_name, ftm.function_file, ftm.test_target,
ftm.coverage_type, fi.start_line, fi.end_line
FROM function_test_map ftm
LEFT JOIN function_index fi
ON fi.file_path = ftm.function_file
AND fi.name = ftm.function_name
WHERE ftm.function_name LIKE ?
ORDER BY ftm.function_file""",
(f'%{func_name}%',)).fetchall()
print(f"=== Test Coverage: *{func_name}* ({len(rows)} mappings) ===\n")
for r in rows:
span = f"L{r['start_line']}-{r['end_line']}" if r['start_line'] else '?'
print(f" {r['function_name']:40s} {span:12s} <- {r['test_target']} [{r['coverage_type']}]")
else:
# Summary: files covered vs uncovered
covered = conn.execute("SELECT COUNT(DISTINCT function_file) AS cnt FROM function_test_map").fetchone()['cnt']
total_files = conn.execute("SELECT COUNT(DISTINCT file_path) AS cnt FROM function_index").fetchone()['cnt']
total_funcs = conn.execute("SELECT COUNT(*) AS cnt FROM function_test_map").fetchone()['cnt']
print(f"=== Function Coverage Summary ===\n")
print(f" Files with coverage: {covered}/{total_files}")
print(f" Total (function, test) mappings: {total_funcs}")
print(f"\nTop covered files:")
for r in conn.execute("""SELECT function_file, COUNT(DISTINCT test_target) AS tests,
COUNT(DISTINCT function_name) AS funcs
FROM function_test_map
GROUP BY function_file ORDER BY tests DESC LIMIT 15""").fetchall():
print(f" {r['function_file']:55s} {r['tests']:3d} tests {r['funcs']:4d} functions")
conn.close()
def cmd_config(filter_type: str = None):
"""Show config/CMake option -> code symbol bindings."""
conn = get_conn()
if filter_type:
rows = conn.execute("""SELECT config_file, config_key, code_symbol, code_file,
binding_type, description
FROM config_bindings WHERE binding_type LIKE ? OR config_file LIKE ?
ORDER BY binding_type, config_key""",
(f'%{filter_type}%', f'%{filter_type}%')).fetchall()
else:
rows = conn.execute("""SELECT config_file, config_key, code_symbol, code_file,
binding_type, description
FROM config_bindings ORDER BY binding_type, config_key""").fetchall()
print(f"=== Config Bindings ({len(rows)}) ===\n")
cur_type = None
for r in rows:
if r['binding_type'] != cur_type:
cur_type = r['binding_type']
print(f"\n [{cur_type}]:")
file_str = f" ({r['code_file']})" if r['code_file'] else ''
print(f" {r['config_key']:40s} -> {r['code_symbol']}{file_str}")
conn.close()
def cmd_tags(filter_str: str = None):
"""List semantic tags and their coverage across entities."""
conn = get_conn()
if filter_str:
rows = conn.execute("""
SELECT st.tag, st.domain, st.description, COUNT(et.id) AS tagged_entities
FROM semantic_tags st
LEFT JOIN entity_tags et ON et.tag = st.tag
WHERE st.tag LIKE ? OR st.domain LIKE ? OR st.description LIKE ?
GROUP BY st.tag, st.domain, st.description
ORDER BY tagged_entities DESC, st.tag
""", (f'%{filter_str}%', f'%{filter_str}%', f'%{filter_str}%')).fetchall()
else:
rows = conn.execute("""
SELECT st.tag, st.domain, st.description, COUNT(et.id) AS tagged_entities
FROM semantic_tags st
LEFT JOIN entity_tags et ON et.tag = st.tag
GROUP BY st.tag, st.domain, st.description
ORDER BY tagged_entities DESC, st.tag
""").fetchall()
print(f"=== Semantic Tags ({len(rows)}) ===\n")
for r in rows:
print(f" {r['tag']:24s} [{r['domain']}] entities={r['tagged_entities']}")
print(f" {r['description']}")
conn.close()
def cmd_tag(tag: str):
"""Show entities carrying a specific semantic tag."""
conn = get_conn()
rows = conn.execute("""
SELECT entity_type, entity_id, confidence, origin
FROM entity_tags
WHERE tag LIKE ?
ORDER BY entity_type, confidence DESC, entity_id
""", (f'%{tag}%',)).fetchall()
print(f"=== Semantic Tag: {tag} ({len(rows)} entities) ===\n")
if not rows:
print(" No entities found.")
conn.close()
return
current_type = None
for r in rows:
if r['entity_type'] != current_type:
current_type = r['entity_type']
print(f"\n [{current_type}]")
print(f" {r['entity_id']} (confidence={r['confidence']:.2f}, {r['origin']})")
conn.close()
def cmd_symbol(name: str):
"""Show the full reasoning profile for a function/symbol."""
conn = get_conn()
rows = conn.execute("""
SELECT *
FROM v_symbol_reasoning
WHERE symbol_name LIKE ?
ORDER BY optimization_priority DESC, risk_score DESC, file_path
""", (f'%{name}%',)).fetchall()
print(f"=== Symbol Reasoning: {name} ({len(rows)}) ===\n")
if not rows:
print(" No symbol found.")
conn.close()
return
for r in rows[:20]:
print(f"{r['symbol_name']} [{r['file_path']}]")
print(f" semantic: category={r['category']} math_core={r['math_core']} backend={r['backend']} coord={r['coordinate_model']}")
print(f" security: secret_class={r['secret_class']} secret={r['uses_secret_input']} ct={r['must_be_constant_time']} public_only={r['public_data_only']}")
print(f" perf: hotness={r['hotness_score']:.1f} gpu_candidate={r['gpu_candidate']} batchable={r['batchable']}")
print(f" audit: unit={r['covered_by_unit_test']} fuzz={r['covered_by_fuzz']} ct={r['covered_by_ct_test']}")
print(f" history: modified={r['times_modified']} recent={r['recently_modified']}")
print(f" scores: risk={r['risk_score']:.1f} gain={r['gain_score']:.1f} priority={r['optimization_priority']:.1f}")
print()
conn.close()
def cmd_optimize(top_n: str = '15'):
"""Show high-gain / lower-risk optimization candidates."""
try:
n = int(top_n)
except (TypeError, ValueError):
n = 15
conn = get_conn()
rows = conn.execute("""
SELECT symbol_name, file_path, category, backend, hotness_score, gpu_candidate,
batchable, risk_score, gain_score, optimization_priority
FROM v_symbol_reasoning
WHERE category NOT IN ('test', 'audit', 'fuzz')
ORDER BY optimization_priority DESC, gain_score DESC
LIMIT ?
""", (n,)).fetchall()
print(f"=== Optimization Candidates ({len(rows)}) ===\n")
for r in rows:
print(f" {r['optimization_priority']:>5.1f} gain={r['gain_score']:>5.1f} risk={r['risk_score']:>5.1f} "
f"{r['symbol_name']} [{r['category']}, {r['backend']}]")
print(f" {r['file_path']} hotness={r['hotness_score']:.1f} batchable={r['batchable']} gpu_candidate={r['gpu_candidate']}")
conn.close()
def cmd_risk(top_n: str = '15'):
"""Show high-risk / high-impact symbols that deserve manual review."""
try:
n = int(top_n)
except (TypeError, ValueError):
n = 15
conn = get_conn()
rows = conn.execute("""
SELECT symbol_name, file_path, category, secret_class, risk_score, gain_score,
covered_by_unit_test, covered_by_fuzz, covered_by_ct_test, recently_modified
FROM v_symbol_reasoning
ORDER BY risk_score DESC, gain_score DESC
LIMIT ?
""", (n,)).fetchall()
print(f"=== Risk Hotspots ({len(rows)}) ===\n")
for r in rows:
print(f" risk={r['risk_score']:>5.1f} gain={r['gain_score']:>5.1f} {r['symbol_name']} [{r['category']}, {r['secret_class']}]")
print(f" {r['file_path']} unit={r['covered_by_unit_test']} fuzz={r['covered_by_fuzz']} ct={r['covered_by_ct_test']} recent={r['recently_modified']}")
conn.close()
def cmd_gpuwork(top_n: str = '15'):
"""Show top CPU symbols that look like GPU/offload candidates."""
try:
n = int(top_n)
except (TypeError, ValueError):
n = 15
conn = get_conn()
rows = conn.execute("""
SELECT symbol_name, file_path, category, hotness_score, batchable,
compute_bound, risk_score, gain_score, optimization_priority
FROM v_symbol_reasoning
WHERE backend='cpu' AND gpu_candidate=1
ORDER BY gain_score DESC, optimization_priority DESC
LIMIT ?
""", (n,)).fetchall()
print(f"=== GPU Candidate Symbols ({len(rows)}) ===\n")
for r in rows:
print(f" gain={r['gain_score']:>5.1f} risk={r['risk_score']:>5.1f} {r['symbol_name']} [{r['category']}]")
print(f" {r['file_path']} hotness={r['hotness_score']:.1f} batchable={r['batchable']} compute_bound={r['compute_bound']} priority={r['optimization_priority']:.1f}")
conn.close()
def cmd_fragile(top_n: str = '15'):
"""Show ct-sensitive or invalid-input-sensitive symbols with weak coverage and recent churn."""
try:
n = int(top_n)
except (TypeError, ValueError):
n = 15
conn = get_conn()
rows = conn.execute("""
SELECT ss.symbol_name, ss.file_path, ss.secret_class,
sec.invalid_input_sensitive, cov.covered_by_unit_test,
cov.covered_by_fuzz, cov.covered_by_ct_test,
cov.known_fragile, hist.recently_modified, score.risk_score
FROM symbol_semantics ss
JOIN symbol_security sec
ON sec.symbol_name = ss.symbol_name AND sec.file_path = ss.file_path
JOIN symbol_audit_coverage cov
ON cov.symbol_name = ss.symbol_name AND cov.file_path = ss.file_path
JOIN symbol_history hist
ON hist.symbol_name = ss.symbol_name AND hist.file_path = ss.file_path
JOIN symbol_scores score
ON score.symbol_name = ss.symbol_name AND score.file_path = ss.file_path
WHERE (sec.must_be_constant_time=1 OR sec.invalid_input_sensitive=1)
AND (cov.covered_by_fuzz=0 OR cov.covered_by_ct_test=0)
ORDER BY score.risk_score DESC, hist.recently_modified DESC, cov.known_fragile DESC
LIMIT ?
""", (n,)).fetchall()
print(f"=== Fragile Symbols ({len(rows)}) ===\n")
for r in rows:
print(f" risk={r['risk_score']:>5.1f} {r['symbol_name']} [{r['secret_class']}]")
print(f" {r['file_path']} invalid_input={r['invalid_input_sensitive']} unit={r['covered_by_unit_test']} fuzz={r['covered_by_fuzz']} ct={r['covered_by_ct_test']} recent={r['recently_modified']} fragile={r['known_fragile']}")
conn.close()
COMMANDS = {
'search': ('search <query>', cmd_search),
'file': ('file <path>', cmd_file),
@ -586,6 +953,20 @@ COMMANDS = {
'gaps': ('gaps', cmd_gaps),
'context': ('context <file>', cmd_context),
'preflight': ('preflight [--security|--coverage|--abi]', cmd_preflight),
# Phase 4: new commands
'callgraph': ('callgraph <function>', cmd_callgraph),
'hotspots': ('hotspots [N]', cmd_hotspots),
'dead': ('dead [filter]', cmd_dead),
'aliases': ('aliases [symbol]', cmd_aliases),
'coverage': ('coverage [function]', cmd_coverage),
'config': ('config [type]', cmd_config),
'tags': ('tags [filter]', cmd_tags),
'tag': ('tag <name>', cmd_tag),
'symbol': ('symbol <name>', cmd_symbol),
'optimize': ('optimize [N]', cmd_optimize),
'risk': ('risk [N]', cmd_risk),
'gpuwork': ('gpuwork [N]', cmd_gpuwork),
'fragile': ('fragile [N]', cmd_fragile),
}
if __name__ == '__main__':