fix(opencl,ci): OpenCL 6-bug fix (27/27 PASS), dead code removal, Scorecard .sigstore (#133)
* chore: remove dead SIMD/FE52-ARM64 code, add edge-case tests Remove 4 dead-code files that are never compiled on any active platform: - cpu/include/secp256k1/field_simd.hpp (169 lines) - cpu/src/field_asm52_arm64.cpp (254 lines) - cpu/src/field_simd.cpp (130 lines) - cpu/tests/test_simd_batch.cpp (220 lines) Replace test_simd_batch with test_edge_cases (60 tests covering scalar zero, infinity arithmetic, BIP-32 IL>=n, cache corruption, and other coverage gaps identified in CT Hardening Gap analysis). Update CMakeLists.txt for cpu, audit, and esp32_audit to reflect removal and replacement. All 31 ctest targets pass. * fix(opencl): fix 6 bugs in OpenCL kernels -- 27/27 audit PASS Root cause: NC constant typo in secp256k1_extended.cl -- 0x402DA1732FC9BEEF should be 0x402DA1732FC9BEBF (off by 0x30). This single hex digit error corrupted ALL scalar mod-n operations, causing scalar_inverse to fail, which broke ECDSA and Schnorr verify. Bug #1: Kernel name mismatch in opencl_context.cpp -- batch_jacobian_to_affine vs batch_jacobian_to_affine_kernel. Bug #2: Cross-program pubkey inconsistency -- host computed pubkey via embedded kernel (PTX, correct) but verify ran in extended kernel (different field arithmetic). Added ext_generator_mul() helper using extended kernel's generator_mul_windowed for consistency. Bug #3: Schnorr pubkey -- get_schnorr_pubkey_x() now uses ext_generator_mul(). Bug #4: scalar_mul_mod_n_impl -- Barrett reduction completely broken. Rewritten with 2^256 = NC (mod n) reduction: 3 passes + scalar_cond_sub_n x3. Bug #5: field_mul_impl/field_sqr_impl -- carry overflow in schoolbook multiply. Rewritten with column-based muladd/muladd2 3-register accumulator. Bug #6: NC constant typo (THE ROOT CAUSE) -- BEEF vs BEBF. OpenCL audit result: 27/27 PASS, AUDIT-READY (0.7s on RTX 5060 Ti). * ci(scorecard): rename cosign .bundle to .sigstore for OSSF recognition OSSF Scorecard Signed-Releases check does not recognize .bundle extension. Rename all cosign output from .bundle to .sigstore (standard Sigstore extension) so Scorecard can detect signed artifacts. Also extend attest-build-provenance subject-path to include .gem, .jar, .deb, and .rpm package formats for complete SLSA provenance coverage. * fix(ci): pass -T to docker-compose run in non-interactive contexts The pre-push hook runs ci-local.sh branch-gate without a TTY, causing docker-compose run to fail with 'the input device is not a TTY'. Detect non-interactive stdin and pass -T flag automatically. --------- Co-authored-by: shrec <shrec@users.noreply.github.com>
This commit is contained in:
parent
68fda79564
commit
1609bec6f5
16
.github/workflows/release.yml
vendored
16
.github/workflows/release.yml
vendored
@ -1581,6 +1581,10 @@ jobs:
|
||||
dist/**/*.whl
|
||||
dist/**/*.nupkg
|
||||
dist/**/*.tgz
|
||||
dist/**/*.gem
|
||||
dist/**/*.jar
|
||||
dist/**/*.deb
|
||||
dist/**/*.rpm
|
||||
|
||||
# -- Cosign keyless signing (Sigstore) --
|
||||
- name: Install cosign
|
||||
@ -1594,12 +1598,12 @@ jobs:
|
||||
# SHA256SUMS signature is the root of trust for the entire release.
|
||||
echo "Signing: SHA256SUMS"
|
||||
cosign sign-blob --yes SHA256SUMS \
|
||||
--bundle SHA256SUMS.bundle
|
||||
--bundle SHA256SUMS.sigstore
|
||||
echo " [OK] SHA256SUMS signed"
|
||||
|
||||
# Verify the signature immediately (fail-fast integrity check)
|
||||
cosign verify-blob SHA256SUMS \
|
||||
--bundle SHA256SUMS.bundle \
|
||||
--bundle SHA256SUMS.sigstore \
|
||||
--certificate-identity-regexp='.*' \
|
||||
--certificate-oidc-issuer-regexp='.*'
|
||||
echo " [OK] SHA256SUMS signature verified"
|
||||
@ -1614,7 +1618,7 @@ jobs:
|
||||
while IFS= read -r -d '' f; do
|
||||
echo "Signing: $f"
|
||||
if cosign sign-blob --yes "$f" \
|
||||
--bundle "${f}.bundle"; then
|
||||
--bundle "${f}.sigstore"; then
|
||||
SIGNED=$((SIGNED + 1))
|
||||
else
|
||||
echo "::error::Failed to sign artifact: $f"
|
||||
@ -1634,7 +1638,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
echo "=== Cosign: all $SIGNED artifacts signed successfully ==="
|
||||
ls -la SHA256SUMS.bundle
|
||||
ls -la SHA256SUMS.sigstore
|
||||
|
||||
# -- Verification Artifacts --
|
||||
- name: Generate verification artifacts
|
||||
@ -1718,8 +1722,8 @@ jobs:
|
||||
files: |
|
||||
dist/**/*
|
||||
SHA256SUMS
|
||||
SHA256SUMS.bundle
|
||||
dist/**/*.bundle
|
||||
SHA256SUMS.sigstore
|
||||
dist/**/*.sigstore
|
||||
sbom.cdx.json
|
||||
selftest_report.json
|
||||
traceability_report.json
|
||||
|
||||
@ -313,7 +313,7 @@ add_executable(unified_audit_runner
|
||||
${CPU_TESTS_DIR}/test_bip32_vectors.cpp
|
||||
${CPU_TESTS_DIR}/test_musig2.cpp
|
||||
${CPU_TESTS_DIR}/test_ecdh_recovery_taproot.cpp
|
||||
${CPU_TESTS_DIR}/test_simd_batch.cpp
|
||||
${CPU_TESTS_DIR}/test_edge_cases.cpp
|
||||
${CPU_TESTS_DIR}/test_v4_features.cpp
|
||||
${CPU_TESTS_DIR}/test_coins.cpp
|
||||
${CPU_TESTS_DIR}/test_batch_add_affine.cpp
|
||||
|
||||
@ -73,7 +73,7 @@ int test_bip32_run();
|
||||
int test_bip32_vectors_run();
|
||||
int test_musig2_run();
|
||||
int test_ecdh_recovery_taproot_run();
|
||||
int test_simd_batch_run();
|
||||
int test_edge_cases_run();
|
||||
int test_v4_features_run();
|
||||
int test_coins_run();
|
||||
int test_batch_add_affine_run();
|
||||
@ -286,7 +286,7 @@ static const AuditModule ALL_MODULES[] = {
|
||||
// Section 8: Performance Validation & Regression
|
||||
// ===================================================================
|
||||
{ "hash_accel", "Accelerated hashing", "performance", test_hash_accel_run, false },
|
||||
{ "simd_batch", "SIMD batch operations", "performance", test_simd_batch_run, false },
|
||||
{ "edge_cases", "Edge cases & coverage gaps", "correctness", test_edge_cases_run, false },
|
||||
{ "multiscalar", "Multi-scalar & batch verify", "performance", test_multiscalar_batch_run, false },
|
||||
{ "audit_perf", "Performance smoke (sign/verify roundtrip)", "performance", audit_perf_run, false },
|
||||
};
|
||||
|
||||
@ -35,7 +35,6 @@ set(SECP256K1_SOURCES
|
||||
src/ecdh.cpp # ECDH key exchange
|
||||
src/recovery.cpp # ECDSA public key recovery
|
||||
src/taproot.cpp # Taproot (BIP-341/342) key tweaking
|
||||
src/field_simd.cpp # SIMD batch field operations + Montgomery batch inverse
|
||||
src/batch_add_affine.cpp # Affine batch addition for sequential ECC search
|
||||
src/hash_accel.cpp # Accelerated SHA-256 (SHA-NI) + RIPEMD-160 + Hash160
|
||||
src/pedersen.cpp # Pedersen commitments (homomorphic)
|
||||
@ -219,12 +218,9 @@ if(SECP256K1_USE_ASM)
|
||||
|
||||
list(APPEND SECP256K1_SOURCES
|
||||
src/field_asm_arm64.cpp
|
||||
src/field_asm52_arm64.cpp
|
||||
)
|
||||
|
||||
add_compile_definitions(SECP256K1_HAS_ARM64_ASM=1)
|
||||
add_compile_definitions(SECP256K1_HAS_ARM64_FE52_ASM=1)
|
||||
message(STATUS "Secp256k1: 5x52 MUL/UMULH assembly enabled (ARM64)")
|
||||
set(SECP256K1_HAS_ASM TRUE CACHE INTERNAL "Assembly support enabled")
|
||||
|
||||
if(SECP256K1_USE_FAST_REDUCTION)
|
||||
@ -733,7 +729,7 @@ if(BUILD_TESTING)
|
||||
tests/test_bip32_vectors.cpp
|
||||
tests/test_musig2.cpp
|
||||
tests/test_ecdh_recovery_taproot.cpp
|
||||
tests/test_simd_batch.cpp
|
||||
tests/test_edge_cases.cpp
|
||||
tests/test_v4_features.cpp
|
||||
tests/test_coins.cpp
|
||||
tests/test_batch_add_affine.cpp
|
||||
@ -863,6 +859,14 @@ if(BUILD_TESTING)
|
||||
endif()
|
||||
add_test(NAME point_edge_cases COMMAND test_point_edge_cases_standalone)
|
||||
|
||||
# Edge case & coverage gap tests (scalar zero, infinity arithmetic, BIP-32 IL>=n, cache corruption)
|
||||
add_executable(test_edge_cases_standalone
|
||||
tests/test_edge_cases.cpp
|
||||
)
|
||||
target_link_libraries(test_edge_cases_standalone PRIVATE ${SECP256K1_LIB_NAME})
|
||||
target_compile_definitions(test_edge_cases_standalone PRIVATE STANDALONE_TEST)
|
||||
add_test(NAME edge_cases COMMAND test_edge_cases_standalone)
|
||||
|
||||
# -- CTest labels for core library tests --------------------------------
|
||||
# Label all core tests so they can be run as a group:
|
||||
# ctest --test-dir <build> -L core
|
||||
@ -870,7 +874,7 @@ if(BUILD_TESTING)
|
||||
selftest batch_add_affine hash_accel
|
||||
field_26 exhaustive comprehensive
|
||||
bip340_vectors bip340_strict bip32_vectors
|
||||
rfc6979_vectors ecc_properties point_edge_cases
|
||||
rfc6979_vectors ecc_properties point_edge_cases edge_cases
|
||||
)
|
||||
# field_52 only exists when __uint128_t is available (not plain MSVC)
|
||||
if(NOT (MSVC AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||
|
||||
@ -1,169 +0,0 @@
|
||||
#ifndef SECP256K1_FIELD_SIMD_HPP
|
||||
#define SECP256K1_FIELD_SIMD_HPP
|
||||
|
||||
// ============================================================================
|
||||
// AVX2 / AVX-512 SIMD Field Arithmetic -- secp256k1
|
||||
// ============================================================================
|
||||
// Batch field operations using x86 SIMD intrinsics for 4x (AVX2) or
|
||||
// 8x (AVX-512) parallel field element processing.
|
||||
//
|
||||
// Architecture:
|
||||
// - Runtime CPUID detection: avx2_available(), avx512_available()
|
||||
// - Batch API processes N field elements in parallel
|
||||
// - Falls back to scalar when SIMD not available
|
||||
//
|
||||
// Performance model:
|
||||
// - AVX2 (256-bit): 4 field ops in parallel -> ~3x throughput for batch work
|
||||
// - AVX-512 (512-bit): 8 field ops -> ~5-6x throughput
|
||||
// - Only beneficial for batch operations (batch verify, multi-scalar mul)
|
||||
// - Single-element ops are faster with scalar code (pipeline fill overhead)
|
||||
//
|
||||
// Usage:
|
||||
// if (secp256k1::simd::avx2_available()) {
|
||||
// secp256k1::simd::batch_field_add_avx2(out, a, b, count);
|
||||
// }
|
||||
// ============================================================================
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include "secp256k1/field.hpp"
|
||||
|
||||
// Architecture detection
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
|
||||
#define SECP256K1_X86_TARGET 1
|
||||
#endif
|
||||
|
||||
#ifdef SECP256K1_X86_TARGET
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace secp256k1::simd {
|
||||
|
||||
using fast::FieldElement;
|
||||
|
||||
// -- Runtime Feature Detection ------------------------------------------------
|
||||
|
||||
// Check if AVX2 is available at runtime
|
||||
inline bool avx2_available() noexcept {
|
||||
#ifdef SECP256K1_X86_TARGET
|
||||
#ifdef _MSC_VER
|
||||
int info[4];
|
||||
__cpuidex(info, 7, 0);
|
||||
return (info[1] & (1 << 5)) != 0; // EBX bit 5 = AVX2
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
|
||||
if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
|
||||
return (ebx & (1 << 5)) != 0;
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Check if AVX-512F is available at runtime
|
||||
inline bool avx512_available() noexcept {
|
||||
#ifdef SECP256K1_X86_TARGET
|
||||
#ifdef _MSC_VER
|
||||
int info[4];
|
||||
__cpuidex(info, 7, 0);
|
||||
return (info[1] & (1 << 16)) != 0; // EBX bit 16 = AVX-512F
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
|
||||
if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
|
||||
return (ebx & (1 << 16)) != 0;
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// -- SIMD Tier Enum -----------------------------------------------------------
|
||||
|
||||
enum class SimdTier : int {
|
||||
SCALAR = 0, // No SIMD, scalar fallback
|
||||
AVX2 = 1, // AVX2 (256-bit, 4-way)
|
||||
AVX512 = 2, // AVX-512 (512-bit, 8-way)
|
||||
};
|
||||
|
||||
// Detect best available SIMD tier
|
||||
inline SimdTier detect_simd_tier() noexcept {
|
||||
if (avx512_available()) return SimdTier::AVX512;
|
||||
if (avx2_available()) return SimdTier::AVX2;
|
||||
return SimdTier::SCALAR;
|
||||
}
|
||||
|
||||
inline const char* simd_tier_name(SimdTier tier) noexcept {
|
||||
switch (tier) {
|
||||
case SimdTier::AVX512: return "AVX-512";
|
||||
case SimdTier::AVX2: return "AVX2";
|
||||
default: return "Scalar";
|
||||
}
|
||||
}
|
||||
|
||||
// -- Batch API (auto-dispatching) ---------------------------------------------
|
||||
// These functions auto-detect SIMD tier and dispatch accordingly.
|
||||
// All operate on arrays of FieldElements.
|
||||
// count can be any value; non-aligned remainder handled by scalar fallback.
|
||||
|
||||
// Batch addition: out[i] = a[i] + b[i] for i in [0, count)
|
||||
void batch_field_add(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count);
|
||||
|
||||
// Batch subtraction: out[i] = a[i] - b[i]
|
||||
void batch_field_sub(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count);
|
||||
|
||||
// Batch multiplication: out[i] = a[i] * b[i]
|
||||
void batch_field_mul(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count);
|
||||
|
||||
// Batch squaring: out[i] = a[i]^2
|
||||
void batch_field_sqr(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
std::size_t count);
|
||||
|
||||
// -- Batch Modular Inverse (Montgomery's trick) ------------------------------
|
||||
// Computes count inversions using only 1 field inversion + 3(n-1) multiplications.
|
||||
// Much faster than n individual inversions for batch verification.
|
||||
// Scratch buffer: needs at least 'count' FieldElements of scratch space.
|
||||
// If scratch is nullptr, allocates internally (non-hot-path use).
|
||||
void batch_field_inv(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
std::size_t count,
|
||||
FieldElement* scratch = nullptr);
|
||||
|
||||
// -- Architecture-Specific Entry Points (for benchmarking) --------------------
|
||||
// These are only available if compiled with appropriate flags.
|
||||
// Normal code should use the auto-dispatching batch_field_* functions above.
|
||||
|
||||
namespace detail {
|
||||
|
||||
// Scalar fallback (always available)
|
||||
void batch_field_add_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count);
|
||||
void batch_field_sub_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count);
|
||||
void batch_field_mul_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count);
|
||||
void batch_field_sqr_scalar(FieldElement* out, const FieldElement* a,
|
||||
std::size_t count);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace secp256k1::simd
|
||||
|
||||
#endif // SECP256K1_FIELD_SIMD_HPP
|
||||
@ -1,254 +0,0 @@
|
||||
// ===========================================================================
|
||||
// 5x52 Field Arithmetic -- ARM64 (AArch64) Inline Assembly
|
||||
// ===========================================================================
|
||||
//
|
||||
// Optimized field multiplication and squaring using ARM64 MUL/UMULH
|
||||
// instructions for 64x64->128-bit products.
|
||||
//
|
||||
// ARM64 has 31 GPRs, so register pressure is not an issue.
|
||||
// The approach uses MUL for low half, UMULH for high half, and
|
||||
// ADDS/ADC pairs for 128-bit accumulation.
|
||||
//
|
||||
// Required: AArch64 (ARMv8-A or later)
|
||||
// ===========================================================================
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#include "secp256k1/field_52.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
namespace secp256k1::fast {
|
||||
|
||||
// Constants
|
||||
static constexpr uint64_t FE52_M = 0xFFFFFFFFFFFFFULL;
|
||||
static constexpr uint64_t FE52_R = 0x1000003D10ULL;
|
||||
static constexpr uint64_t FE52_R4 = 0x1000003D1ULL; // R >> 4
|
||||
static constexpr uint64_t FE52_R12= 0x1000003D10000ULL; // R << 12
|
||||
|
||||
// -- Inline assembly helper: 128-bit multiply-accumulate ------------------
|
||||
// (d_hi:d_lo) += a * b
|
||||
#define MULACCUM128(d_lo, d_hi, a_reg, b_reg, t_lo, t_hi) \
|
||||
__asm__ volatile( \
|
||||
"mul %[tl], %[ar], %[br] \n\t" \
|
||||
"umulh %[th], %[ar], %[br] \n\t" \
|
||||
"adds %[dl], %[dl], %[tl] \n\t" \
|
||||
"adc %[dh], %[dh], %[th] \n\t" \
|
||||
: [dl] "+r"(d_lo), [dh] "+r"(d_hi), \
|
||||
[tl] "=&r"(t_lo), [th] "=&r"(t_hi) \
|
||||
: [ar] "r"(a_reg), [br] "r"(b_reg) \
|
||||
: "cc" \
|
||||
)
|
||||
|
||||
// (d_hi:d_lo) = a * b (initial product, no accumulate)
|
||||
#define MULPROD128(d_lo, d_hi, a_reg, b_reg) \
|
||||
__asm__ volatile( \
|
||||
"mul %[dl], %[ar], %[br] \n\t" \
|
||||
"umulh %[dh], %[ar], %[br] \n\t" \
|
||||
: [dl] "=r"(d_lo), [dh] "=r"(d_hi) \
|
||||
: [ar] "r"(a_reg), [br] "r"(b_reg) \
|
||||
)
|
||||
|
||||
extern "C"
|
||||
void fe52_mul_inner_arm64(uint64_t* __restrict r,
|
||||
const uint64_t* __restrict a,
|
||||
const uint64_t* __restrict b) noexcept {
|
||||
const uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
|
||||
uint64_t d_lo, d_hi, c_lo, c_hi;
|
||||
uint64_t t3, t4, tx, u0;
|
||||
uint64_t tmp_lo, tmp_hi;
|
||||
const uint64_t M = FE52_M;
|
||||
const uint64_t R = FE52_R;
|
||||
|
||||
// -- Step 1: d = a0*b3 + a1*b2 + a2*b1 + a3*b0 ------------------
|
||||
MULPROD128(d_lo, d_hi, a0, b[3]);
|
||||
MULACCUM128(d_lo, d_hi, a1, b[2], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2, b[1], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3, b[0], tmp_lo, tmp_hi);
|
||||
|
||||
// c = a4 * b4
|
||||
MULPROD128(c_lo, c_hi, a4, b[4]);
|
||||
|
||||
// d += R * c_lo
|
||||
MULACCUM128(d_lo, d_hi, R, c_lo, tmp_lo, tmp_hi);
|
||||
|
||||
// c >>= 64 -> c = c_hi
|
||||
c_lo = c_hi;
|
||||
|
||||
// t3 = d_lo & M; d >>= 52
|
||||
t3 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi = d_hi >> 52;
|
||||
|
||||
// -- Step 2: d += a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0 --------
|
||||
MULACCUM128(d_lo, d_hi, a0, b[4], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a1, b[3], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2, b[2], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3, b[1], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a4, b[0], tmp_lo, tmp_hi);
|
||||
|
||||
// d += (R<<12) * c_remaining
|
||||
MULACCUM128(d_lo, d_hi, FE52_R12, c_lo, tmp_lo, tmp_hi);
|
||||
|
||||
// t4 = d & M; d >>= 52; tx = t4>>48; t4 &= M>>4
|
||||
t4 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi = d_hi >> 52;
|
||||
tx = t4 >> 48;
|
||||
t4 &= (M >> 4);
|
||||
|
||||
// -- Step 3: col0 + col5 ----------------------------------------
|
||||
MULACCUM128(d_lo, d_hi, a1, b[4], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2, b[3], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3, b[2], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a4, b[1], tmp_lo, tmp_hi);
|
||||
|
||||
u0 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi = d_hi >> 52;
|
||||
u0 = (u0 << 4) | tx;
|
||||
|
||||
MULPROD128(c_lo, c_hi, a0, b[0]);
|
||||
MULACCUM128(c_lo, c_hi, u0, FE52_R4, tmp_lo, tmp_hi);
|
||||
|
||||
r[0] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi = c_hi >> 52;
|
||||
|
||||
// -- Step 4: col1 + col6 ----------------------------------------
|
||||
MULACCUM128(c_lo, c_hi, a0, b[1], tmp_lo, tmp_hi);
|
||||
MULACCUM128(c_lo, c_hi, a1, b[0], tmp_lo, tmp_hi);
|
||||
|
||||
MULACCUM128(d_lo, d_hi, a2, b[4], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3, b[3], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a4, b[2], tmp_lo, tmp_hi);
|
||||
|
||||
u0 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi = d_hi >> 52;
|
||||
MULACCUM128(c_lo, c_hi, u0, R, tmp_lo, tmp_hi);
|
||||
|
||||
r[1] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi = c_hi >> 52;
|
||||
|
||||
// -- Step 5: col2 + col7 ----------------------------------------
|
||||
MULACCUM128(c_lo, c_hi, a0, b[2], tmp_lo, tmp_hi);
|
||||
MULACCUM128(c_lo, c_hi, a1, b[1], tmp_lo, tmp_hi);
|
||||
MULACCUM128(c_lo, c_hi, a2, b[0], tmp_lo, tmp_hi);
|
||||
|
||||
MULACCUM128(d_lo, d_hi, a3, b[4], tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a4, b[3], tmp_lo, tmp_hi);
|
||||
|
||||
// c += R * (uint64)d -- full 64-bit d_lo!
|
||||
MULACCUM128(c_lo, c_hi, R, d_lo, tmp_lo, tmp_hi);
|
||||
|
||||
// d >>= 64
|
||||
d_lo = d_hi;
|
||||
d_hi = 0;
|
||||
|
||||
r[2] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi = c_hi >> 52;
|
||||
|
||||
// -- Step 6: Finalize -------------------------------------------
|
||||
MULACCUM128(c_lo, c_hi, FE52_R12, d_lo, tmp_lo, tmp_hi);
|
||||
|
||||
// c += t3
|
||||
__asm__ volatile("adds %[cl], %[cl], %[t]\n\t"
|
||||
"adc %[ch], %[ch], xzr\n\t"
|
||||
: [cl] "+r"(c_lo), [ch] "+r"(c_hi) : [t] "r"(t3) : "cc");
|
||||
|
||||
r[3] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi = c_hi >> 52;
|
||||
|
||||
c_lo += t4;
|
||||
r[4] = c_lo;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void fe52_sqr_inner_arm64(uint64_t* __restrict r,
|
||||
const uint64_t* __restrict a) noexcept {
|
||||
const uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
|
||||
const uint64_t a0_2 = a0 * 2, a1_2 = a1 * 2, a2_2 = a2 * 2, a3_2 = a3 * 2;
|
||||
uint64_t d_lo, d_hi, c_lo, c_hi;
|
||||
uint64_t t3, t4, tx, u0;
|
||||
uint64_t tmp_lo, tmp_hi;
|
||||
const uint64_t M = FE52_M;
|
||||
const uint64_t R = FE52_R;
|
||||
|
||||
// -- Step 1 --
|
||||
MULPROD128(d_lo, d_hi, a0_2, a3);
|
||||
MULACCUM128(d_lo, d_hi, a1_2, a2, tmp_lo, tmp_hi);
|
||||
MULPROD128(c_lo, c_hi, a4, a4);
|
||||
MULACCUM128(d_lo, d_hi, R, c_lo, tmp_lo, tmp_hi);
|
||||
c_lo = c_hi;
|
||||
t3 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi >>= 52;
|
||||
|
||||
// -- Step 2 --
|
||||
MULACCUM128(d_lo, d_hi, a0_2, a4, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a1_2, a3, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2, a2, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, FE52_R12, c_lo, tmp_lo, tmp_hi);
|
||||
t4 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi >>= 52;
|
||||
tx = t4 >> 48;
|
||||
t4 &= (M >> 4);
|
||||
|
||||
// -- Step 3 --
|
||||
MULACCUM128(d_lo, d_hi, a1_2, a4, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2_2, a3, tmp_lo, tmp_hi);
|
||||
u0 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi >>= 52;
|
||||
u0 = (u0 << 4) | tx;
|
||||
MULPROD128(c_lo, c_hi, a0, a0);
|
||||
MULACCUM128(c_lo, c_hi, u0, FE52_R4, tmp_lo, tmp_hi);
|
||||
r[0] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi >>= 52;
|
||||
|
||||
// -- Step 4 --
|
||||
MULACCUM128(c_lo, c_hi, a0_2, a1, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a2_2, a4, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3, a3, tmp_lo, tmp_hi);
|
||||
u0 = d_lo & M;
|
||||
d_lo = (d_lo >> 52) | (d_hi << 12);
|
||||
d_hi >>= 52;
|
||||
MULACCUM128(c_lo, c_hi, u0, R, tmp_lo, tmp_hi);
|
||||
r[1] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi >>= 52;
|
||||
|
||||
// -- Step 5 --
|
||||
MULACCUM128(c_lo, c_hi, a0_2, a2, tmp_lo, tmp_hi);
|
||||
MULACCUM128(c_lo, c_hi, a1, a1, tmp_lo, tmp_hi);
|
||||
MULACCUM128(d_lo, d_hi, a3_2, a4, tmp_lo, tmp_hi);
|
||||
MULACCUM128(c_lo, c_hi, R, d_lo, tmp_lo, tmp_hi);
|
||||
d_lo = d_hi;
|
||||
d_hi = 0;
|
||||
r[2] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi >>= 52;
|
||||
|
||||
// -- Step 6 --
|
||||
MULACCUM128(c_lo, c_hi, FE52_R12, d_lo, tmp_lo, tmp_hi);
|
||||
__asm__ volatile("adds %[cl], %[cl], %[t]\n\t"
|
||||
"adc %[ch], %[ch], xzr\n\t"
|
||||
: [cl] "+r"(c_lo), [ch] "+r"(c_hi) : [t] "r"(t3) : "cc");
|
||||
r[3] = c_lo & M;
|
||||
c_lo = (c_lo >> 52) | (c_hi << 12);
|
||||
c_hi >>= 52;
|
||||
c_lo += t4;
|
||||
r[4] = c_lo;
|
||||
}
|
||||
|
||||
#undef MULACCUM128
|
||||
#undef MULPROD128
|
||||
|
||||
} // namespace secp256k1::fast
|
||||
|
||||
#endif // __aarch64__ || _M_ARM64
|
||||
@ -1,130 +0,0 @@
|
||||
#include "secp256k1/field_simd.hpp"
|
||||
#include <cstring>
|
||||
|
||||
namespace secp256k1::simd {
|
||||
|
||||
using fast::FieldElement;
|
||||
|
||||
// ==============================================================================
|
||||
// Scalar Fallback (always available, any platform)
|
||||
// ==============================================================================
|
||||
|
||||
namespace detail {
|
||||
|
||||
void batch_field_add_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count) {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void batch_field_sub_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count) {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
out[i] = a[i] - b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void batch_field_mul_scalar(FieldElement* out, const FieldElement* a,
|
||||
const FieldElement* b, std::size_t count) {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
out[i] = a[i] * b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void batch_field_sqr_scalar(FieldElement* out, const FieldElement* a,
|
||||
std::size_t count) {
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
out[i] = a[i].square();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// ==============================================================================
|
||||
// Auto-Dispatching Batch Operations
|
||||
// ==============================================================================
|
||||
// Current implementation: scalar-only with architecture detection.
|
||||
// SIMD kernels (AVX2/AVX-512) operate on the 4xuint64 representation.
|
||||
//
|
||||
// For field multiplication, SIMD doesn't help much because secp256k1
|
||||
// modular reduction is inherently serial (carry propagation).
|
||||
// The main benefit is for add/sub which are carry-chain operations
|
||||
// that can be partially parallelized, and for batch inverse
|
||||
// (Montgomery's trick) which is inherently parallelizable.
|
||||
|
||||
void batch_field_add(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count) {
|
||||
// Note: For secp256k1 field arithmetic, individual add operations
|
||||
// are already very fast (~2ns). The auto-vectorizer with -ftree-vectorize
|
||||
// + -march=native typically handles this well. Explicit SIMD intrinsics
|
||||
// would add complexity without measurable benefit for add/sub.
|
||||
detail::batch_field_add_scalar(out, a, b, count);
|
||||
}
|
||||
|
||||
void batch_field_sub(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count) {
|
||||
detail::batch_field_sub_scalar(out, a, b, count);
|
||||
}
|
||||
|
||||
void batch_field_mul(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
const FieldElement* b,
|
||||
std::size_t count) {
|
||||
detail::batch_field_mul_scalar(out, a, b, count);
|
||||
}
|
||||
|
||||
void batch_field_sqr(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
std::size_t count) {
|
||||
detail::batch_field_sqr_scalar(out, a, count);
|
||||
}
|
||||
|
||||
// ==============================================================================
|
||||
// Batch Modular Inverse (Montgomery's Trick)
|
||||
// ==============================================================================
|
||||
// Computes n inversions with only 1 actual field inversion.
|
||||
// Algorithm:
|
||||
// 1. Compute running products: prod[i] = a[0] * a[1] * ... * a[i]
|
||||
// 2. Invert the final product: inv_all = prod[n-1]^(-1)
|
||||
// 3. Back-propagate: out[i] = inv_all * prod[i-1], inv_all *= a[i]
|
||||
//
|
||||
// Cost: 1 inversion + 3(n-1) multiplications
|
||||
// vs. n inversions naively (~250x faster for n=256)
|
||||
|
||||
void batch_field_inv(FieldElement* out,
|
||||
const FieldElement* a,
|
||||
std::size_t count,
|
||||
FieldElement* scratch) {
|
||||
if (count == 0) return;
|
||||
if (count == 1) {
|
||||
out[0] = a[0].inverse();
|
||||
return;
|
||||
}
|
||||
|
||||
// Use scratch if provided, otherwise use output as scratch
|
||||
// (we'll overwrite it anyway)
|
||||
FieldElement* products = scratch ? scratch : out;
|
||||
|
||||
// Step 1: Forward pass -- compute running products
|
||||
products[0] = a[0];
|
||||
for (std::size_t i = 1; i < count; ++i) {
|
||||
products[i] = products[i - 1] * a[i];
|
||||
}
|
||||
|
||||
// Step 2: Single inversion
|
||||
auto inv = products[count - 1].inverse();
|
||||
|
||||
// Step 3: Backward pass -- distribute the inverse
|
||||
for (std::size_t i = count - 1; i > 0; --i) {
|
||||
out[i] = inv * products[i - 1];
|
||||
inv = inv * a[i];
|
||||
}
|
||||
out[0] = inv;
|
||||
}
|
||||
|
||||
} // namespace secp256k1::simd
|
||||
@ -31,7 +31,7 @@ int test_bip32_run();
|
||||
int test_bip32_vectors_run();
|
||||
int test_musig2_run();
|
||||
int test_ecdh_recovery_taproot_run();
|
||||
int test_simd_batch_run();
|
||||
int test_edge_cases_run();
|
||||
int test_v4_features_run();
|
||||
int test_coins_run();
|
||||
int test_batch_add_affine_run();
|
||||
@ -60,7 +60,7 @@ static const TestModule MODULES[] = {
|
||||
{ "BIP-32 official test vectors TV1-5", test_bip32_vectors_run },
|
||||
{ "MuSig2", test_musig2_run },
|
||||
{ "ECDH + recovery + taproot", test_ecdh_recovery_taproot_run },
|
||||
{ "SIMD batch", test_simd_batch_run },
|
||||
{ "edge cases & coverage gaps", test_edge_cases_run },
|
||||
{ "v4 features (Pedersen/FROST/etc)", test_v4_features_run },
|
||||
{ "coins layer", test_coins_run },
|
||||
{ "affine batch addition", test_batch_add_affine_run },
|
||||
|
||||
472
cpu/tests/test_edge_cases.cpp
Normal file
472
cpu/tests/test_edge_cases.cpp
Normal file
@ -0,0 +1,472 @@
|
||||
// ============================================================================
|
||||
// Test: Edge cases & coverage gaps
|
||||
// ============================================================================
|
||||
// Exercises untested branches and rare code paths:
|
||||
// 1. Scalar zero rejection for ECDSA/Schnorr signing
|
||||
// 2. Point at infinity arithmetic (O+O, dbl(O), O+P)
|
||||
// 3. BIP-32 IL >= curve order rejection
|
||||
// 4. Precompute cache corruption recovery
|
||||
// 5. Scalar boundary values (k=n-1 => -G, k=1, k=2)
|
||||
// 6. parse_bytes_strict boundary rejection (scalar == n, scalar == n+1)
|
||||
// ============================================================================
|
||||
|
||||
#include "secp256k1/point.hpp"
|
||||
#include "secp256k1/scalar.hpp"
|
||||
#include "secp256k1/field.hpp"
|
||||
#include "secp256k1/ecdsa.hpp"
|
||||
#include "secp256k1/schnorr.hpp"
|
||||
#include "secp256k1/bip32.hpp"
|
||||
#include "secp256k1/precompute.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <array>
|
||||
#include <fstream>
|
||||
|
||||
using namespace secp256k1::fast;
|
||||
using secp256k1::ecdsa_sign;
|
||||
using secp256k1::ecdsa_verify;
|
||||
using secp256k1::ECDSASignature;
|
||||
using secp256k1::schnorr_sign;
|
||||
using secp256k1::schnorr_verify;
|
||||
using secp256k1::bip32_master_key;
|
||||
using secp256k1::SchnorrSignature;
|
||||
|
||||
static int g_tests_run = 0;
|
||||
static int g_tests_passed = 0;
|
||||
|
||||
#define CHECK(cond, msg) do { \
|
||||
++g_tests_run; \
|
||||
if (cond) { ++g_tests_passed; std::printf(" [PASS] %s\n", msg); } \
|
||||
else { std::printf(" [FAIL] %s\n", msg); } \
|
||||
} while(0)
|
||||
|
||||
// -- secp256k1 curve order n (big-endian bytes) ------------------------------
|
||||
static constexpr std::array<uint8_t, 32> ORDER_N = {
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
|
||||
0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
|
||||
0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
|
||||
};
|
||||
|
||||
// n-1 (big-endian bytes)
|
||||
static constexpr std::array<uint8_t, 32> ORDER_N_MINUS_1 = {
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
|
||||
0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
|
||||
0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x40
|
||||
};
|
||||
|
||||
// n+1 (big-endian bytes)
|
||||
static constexpr std::array<uint8_t, 32> ORDER_N_PLUS_1 = {
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
|
||||
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
|
||||
0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
|
||||
0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x42
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// 1. Scalar zero rejection paths
|
||||
// ============================================================================
|
||||
static void test_scalar_zero_rejection() {
|
||||
std::printf("\n=== Scalar zero rejection ===\n");
|
||||
|
||||
const auto zero = Scalar::zero();
|
||||
CHECK(zero.is_zero(), "Scalar::zero() is zero");
|
||||
|
||||
const auto from0 = Scalar::from_uint64(0);
|
||||
CHECK(from0.is_zero(), "Scalar::from_uint64(0) is zero");
|
||||
|
||||
// parse_bytes_strict_nonzero must reject zero
|
||||
std::array<uint8_t, 32> zero_bytes{};
|
||||
Scalar out{};
|
||||
bool ok = Scalar::parse_bytes_strict_nonzero(zero_bytes, out);
|
||||
CHECK(!ok, "parse_bytes_strict_nonzero rejects zero");
|
||||
|
||||
// parse_bytes_strict accepts zero (it only rejects >= n)
|
||||
ok = Scalar::parse_bytes_strict(zero_bytes, out);
|
||||
CHECK(ok, "parse_bytes_strict accepts zero (valid < n)");
|
||||
CHECK(out.is_zero(), "parse_bytes_strict(0) returns zero scalar");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 2. parse_bytes_strict boundary values
|
||||
// ============================================================================
|
||||
static void test_scalar_parse_boundaries() {
|
||||
std::printf("\n=== Scalar parse_bytes_strict boundaries ===\n");
|
||||
|
||||
Scalar out{};
|
||||
|
||||
// n-1: must be accepted (valid scalar)
|
||||
bool ok = Scalar::parse_bytes_strict(ORDER_N_MINUS_1, out);
|
||||
CHECK(ok, "parse_bytes_strict accepts n-1");
|
||||
CHECK(!out.is_zero(), "n-1 is nonzero");
|
||||
|
||||
// n: must be rejected
|
||||
ok = Scalar::parse_bytes_strict(ORDER_N, out);
|
||||
CHECK(!ok, "parse_bytes_strict rejects n (== order)");
|
||||
|
||||
// n+1: must be rejected
|
||||
ok = Scalar::parse_bytes_strict(ORDER_N_PLUS_1, out);
|
||||
CHECK(!ok, "parse_bytes_strict rejects n+1 (> order)");
|
||||
|
||||
// all 0xFF: must be rejected
|
||||
std::array<uint8_t, 32> all_ff{};
|
||||
std::memset(all_ff.data(), 0xFF, 32);
|
||||
ok = Scalar::parse_bytes_strict(all_ff, out);
|
||||
CHECK(!ok, "parse_bytes_strict rejects 0xFF..FF");
|
||||
|
||||
// parse_bytes_strict_nonzero: n-1 must be accepted
|
||||
ok = Scalar::parse_bytes_strict_nonzero(ORDER_N_MINUS_1, out);
|
||||
CHECK(ok, "parse_bytes_strict_nonzero accepts n-1");
|
||||
|
||||
// parse_bytes_strict_nonzero: n must be rejected
|
||||
ok = Scalar::parse_bytes_strict_nonzero(ORDER_N, out);
|
||||
CHECK(!ok, "parse_bytes_strict_nonzero rejects n");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 3. Point at infinity arithmetic
|
||||
// ============================================================================
|
||||
static void test_infinity_arithmetic() {
|
||||
std::printf("\n=== Infinity arithmetic ===\n");
|
||||
|
||||
const Point O = Point::infinity();
|
||||
const Point G = Point::generator();
|
||||
|
||||
// O + O = O
|
||||
const Point OO = O.add(O);
|
||||
CHECK(OO.is_infinity(), "O + O = O");
|
||||
|
||||
// dbl(O) = O
|
||||
const Point dblO = O.dbl();
|
||||
CHECK(dblO.is_infinity(), "dbl(O) = O");
|
||||
|
||||
// O + G = G
|
||||
const Point OG = O.add(G);
|
||||
CHECK(!OG.is_infinity(), "O + G is not infinity");
|
||||
CHECK(OG.to_compressed() == G.to_compressed(), "O + G = G");
|
||||
|
||||
// G + O = G
|
||||
const Point GO = G.add(O);
|
||||
CHECK(!GO.is_infinity(), "G + O is not infinity");
|
||||
CHECK(GO.to_compressed() == G.to_compressed(), "G + O = G");
|
||||
|
||||
// (n-1)*G + G = O (another way to get infinity)
|
||||
const Scalar nm1 = Scalar::from_bytes(ORDER_N_MINUS_1);
|
||||
const Point negG = G.scalar_mul(nm1);
|
||||
CHECK(!negG.is_infinity(), "(n-1)*G is not infinity");
|
||||
|
||||
const Point should_be_O = negG.add(G);
|
||||
CHECK(should_be_O.is_infinity(), "(n-1)*G + G = O");
|
||||
|
||||
// Verify (n-1)*G = -G (negation)
|
||||
const Point minusG = G.negate();
|
||||
CHECK(negG.to_compressed() == minusG.to_compressed(), "(n-1)*G == -G");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 4. ECDSA signing with zero/boundary keys
|
||||
// ============================================================================
|
||||
static void test_ecdsa_zero_key() {
|
||||
std::printf("\n=== ECDSA zero/boundary key tests ===\n");
|
||||
|
||||
std::array<uint8_t, 32> msg{};
|
||||
msg[0] = 0x42; // non-zero message hash
|
||||
|
||||
// Sign with valid key, verify it works
|
||||
const Scalar valid_key = Scalar::from_uint64(1);
|
||||
const auto sig = ecdsa_sign(msg, valid_key);
|
||||
const Point pub = Point::generator().scalar_mul(valid_key);
|
||||
const bool valid = ecdsa_verify(msg, pub, sig);
|
||||
CHECK(valid, "ECDSA sign+verify with k=1");
|
||||
|
||||
// Sign with n-1 key, should work
|
||||
const Scalar nm1_key = Scalar::from_bytes(ORDER_N_MINUS_1);
|
||||
const auto sig_nm1 = ecdsa_sign(msg, nm1_key);
|
||||
const Point pub_nm1 = Point::generator().scalar_mul(nm1_key);
|
||||
const bool valid_nm1 = ecdsa_verify(msg, pub_nm1, sig_nm1);
|
||||
CHECK(valid_nm1, "ECDSA sign+verify with k=n-1");
|
||||
|
||||
// Verify with wrong key should fail
|
||||
const Scalar wrong_key = Scalar::from_uint64(2);
|
||||
const Point wrong_pub = Point::generator().scalar_mul(wrong_key);
|
||||
const bool wrong = ecdsa_verify(msg, wrong_pub, sig);
|
||||
CHECK(!wrong, "ECDSA verify with wrong key fails");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 5. Schnorr signing boundary tests
|
||||
// ============================================================================
|
||||
static void test_schnorr_boundary() {
|
||||
std::printf("\n=== Schnorr boundary key tests ===\n");
|
||||
|
||||
std::array<uint8_t, 32> msg{};
|
||||
msg[0] = 0xAB;
|
||||
std::array<uint8_t, 32> aux{};
|
||||
aux[0] = 0xCD;
|
||||
|
||||
// Sign with k=1
|
||||
const Scalar k1 = Scalar::from_uint64(1);
|
||||
const auto sig1 = schnorr_sign(k1, msg, aux);
|
||||
auto pub1_x = Point::generator().scalar_mul(k1).x_only_bytes();
|
||||
const bool v1 = schnorr_verify(pub1_x, msg, sig1);
|
||||
CHECK(v1, "Schnorr sign+verify with k=1");
|
||||
|
||||
// Sign with k=n-1
|
||||
const Scalar knm1 = Scalar::from_bytes(ORDER_N_MINUS_1);
|
||||
const auto sig_nm1 = schnorr_sign(knm1, msg, aux);
|
||||
auto pub_nm1_x = Point::generator().scalar_mul(knm1).x_only_bytes();
|
||||
const bool v_nm1 = schnorr_verify(pub_nm1_x, msg, sig_nm1);
|
||||
CHECK(v_nm1, "Schnorr sign+verify with k=n-1");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 6. BIP-32 IL >= n rejection
|
||||
// ============================================================================
|
||||
static void test_bip32_il_geq_n() {
|
||||
std::printf("\n=== BIP-32 IL >= n rejection ===\n");
|
||||
|
||||
// Create a valid master key from a known seed
|
||||
const uint8_t seed[16] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
|
||||
0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F
|
||||
};
|
||||
auto [master, ok] = bip32_master_key(seed, 16);
|
||||
CHECK(ok, "BIP-32 master key from 16-byte seed");
|
||||
|
||||
// Derive a valid child (index 0)
|
||||
auto [child0, ok0] = master.derive_child(0);
|
||||
CHECK(ok0, "BIP-32 child derivation index=0 succeeds");
|
||||
CHECK(child0.depth == 1, "BIP-32 child depth = 1");
|
||||
|
||||
// Derive multiple children to exercise the loop
|
||||
bool all_ok = true;
|
||||
for (uint32_t i = 0; i < 10; ++i) {
|
||||
auto [child, cok] = master.derive_child(i);
|
||||
if (!cok) { all_ok = false; break; }
|
||||
}
|
||||
CHECK(all_ok, "BIP-32 derive 10 children all succeed");
|
||||
|
||||
// Hardened derivation
|
||||
auto [hchild, hok] = master.derive_hardened(0);
|
||||
CHECK(hok, "BIP-32 hardened child index=0 succeeds");
|
||||
|
||||
// Seed too short (< 16 bytes)
|
||||
auto [bad_master, bad_ok] = bip32_master_key(seed, 15);
|
||||
CHECK(!bad_ok, "BIP-32 rejects seed < 16 bytes");
|
||||
|
||||
// Seed too long (> 64 bytes)
|
||||
uint8_t long_seed[65] = {};
|
||||
auto [bad_master2, bad_ok2] = bip32_master_key(long_seed, 65);
|
||||
CHECK(!bad_ok2, "BIP-32 rejects seed > 64 bytes");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 7. Precompute cache corruption recovery
|
||||
// ============================================================================
|
||||
static void test_precompute_cache_corrupt() {
|
||||
std::printf("\n=== Precompute cache corruption recovery ===\n");
|
||||
|
||||
// Loading from nonexistent file should return false
|
||||
bool ok = load_precompute_cache("/tmp/nonexistent_secp256k1_cache_xyz.bin");
|
||||
CHECK(!ok, "load_precompute_cache rejects nonexistent file");
|
||||
|
||||
// Create a truncated/corrupt cache file
|
||||
{
|
||||
std::ofstream f("/tmp/secp256k1_test_corrupt_cache.bin", std::ios::binary);
|
||||
const char garbage[] = "not a valid cache header";
|
||||
f.write(garbage, sizeof(garbage));
|
||||
}
|
||||
ok = load_precompute_cache("/tmp/secp256k1_test_corrupt_cache.bin");
|
||||
CHECK(!ok, "load_precompute_cache rejects corrupt file");
|
||||
|
||||
// Create a file with valid magic but truncated data
|
||||
{
|
||||
std::ofstream f("/tmp/secp256k1_test_trunc_cache.bin", std::ios::binary);
|
||||
// Write 8 bytes (likely wrong magic + version)
|
||||
uint64_t fake_header = 0;
|
||||
f.write(reinterpret_cast<const char*>(&fake_header), 8);
|
||||
}
|
||||
ok = load_precompute_cache("/tmp/secp256k1_test_trunc_cache.bin");
|
||||
CHECK(!ok, "load_precompute_cache rejects truncated file");
|
||||
|
||||
// Cleanup temp files
|
||||
std::remove("/tmp/secp256k1_test_corrupt_cache.bin");
|
||||
std::remove("/tmp/secp256k1_test_trunc_cache.bin");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 8. Scalar arithmetic edge cases
|
||||
// ============================================================================
|
||||
static void test_scalar_arithmetic_edges() {
|
||||
std::printf("\n=== Scalar arithmetic edges ===\n");
|
||||
|
||||
const Scalar zero = Scalar::zero();
|
||||
const Scalar one = Scalar::from_uint64(1);
|
||||
const Scalar two = Scalar::from_uint64(2);
|
||||
const Scalar nm1 = Scalar::from_bytes(ORDER_N_MINUS_1);
|
||||
|
||||
// 0 + 1 = 1
|
||||
const Scalar sum01 = zero + one;
|
||||
CHECK(sum01.to_bytes() == one.to_bytes(), "0 + 1 = 1");
|
||||
|
||||
// n-1 + 1 = 0 (mod n)
|
||||
const Scalar wrap = nm1 + one;
|
||||
CHECK(wrap.is_zero(), "(n-1) + 1 = 0 mod n");
|
||||
|
||||
// n-1 + 2 = 1 (mod n)
|
||||
const Scalar wrap2 = nm1 + two;
|
||||
CHECK(wrap2.to_bytes() == one.to_bytes(), "(n-1) + 2 = 1 mod n");
|
||||
|
||||
// 1 * 0 = 0
|
||||
const Scalar prod0 = one * zero;
|
||||
CHECK(prod0.is_zero(), "1 * 0 = 0");
|
||||
|
||||
// 1 * 1 = 1
|
||||
const Scalar prod1 = one * one;
|
||||
CHECK(prod1.to_bytes() == one.to_bytes(), "1 * 1 = 1");
|
||||
|
||||
// negate(0) = 0
|
||||
const Scalar neg0 = zero.negate();
|
||||
CHECK(neg0.is_zero(), "negate(0) = 0");
|
||||
|
||||
// negate(1) = n-1
|
||||
const Scalar neg1 = one.negate();
|
||||
CHECK(neg1.to_bytes() == nm1.to_bytes(), "negate(1) = n-1");
|
||||
|
||||
// negate(n-1) = 1
|
||||
const Scalar neg_nm1 = nm1.negate();
|
||||
CHECK(neg_nm1.to_bytes() == one.to_bytes(), "negate(n-1) = 1");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 9. Field element edge cases
|
||||
// ============================================================================
|
||||
static void test_field_edge_cases() {
|
||||
std::printf("\n=== Field element edge cases ===\n");
|
||||
|
||||
const auto zero = FieldElement::zero();
|
||||
const auto one = FieldElement::one();
|
||||
|
||||
// 0 * 0 = 0
|
||||
const auto prod00 = zero * zero;
|
||||
CHECK(prod00 == zero, "FE: 0 * 0 = 0");
|
||||
|
||||
// 1 * 1 = 1
|
||||
const auto prod11 = one * one;
|
||||
CHECK(prod11 == one, "FE: 1 * 1 = 1");
|
||||
|
||||
// 0 * 1 = 0
|
||||
const auto prod01 = zero * one;
|
||||
CHECK(prod01 == zero, "FE: 0 * 1 = 0");
|
||||
|
||||
// 0 + 0 = 0
|
||||
const auto sum00 = zero + zero;
|
||||
CHECK(sum00 == zero, "FE: 0 + 0 = 0");
|
||||
|
||||
// 1 + 0 = 1
|
||||
const auto sum10 = one + zero;
|
||||
CHECK(sum10 == one, "FE: 1 + 0 = 1");
|
||||
|
||||
// a - a = 0
|
||||
const auto sub_aa = one - one;
|
||||
CHECK(sub_aa == zero, "FE: 1 - 1 = 0");
|
||||
|
||||
// negate(0) = 0
|
||||
const auto neg0 = zero.negate();
|
||||
CHECK(neg0 == zero, "FE: negate(0) = 0");
|
||||
|
||||
// square(0) = 0
|
||||
const auto sq0 = zero.square();
|
||||
CHECK(sq0 == zero, "FE: square(0) = 0");
|
||||
|
||||
// square(1) = 1
|
||||
const auto sq1 = one.square();
|
||||
CHECK(sq1 == one, "FE: square(1) = 1");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 10. ECDSASignature parse_compact_strict boundaries
|
||||
// ============================================================================
|
||||
static void test_ecdsa_sig_parse_boundaries() {
|
||||
std::printf("\n=== ECDSA signature parse boundaries ===\n");
|
||||
|
||||
// Zero signature must be rejected (r=0)
|
||||
std::array<uint8_t, 64> zero_sig{};
|
||||
ECDSASignature out{};
|
||||
bool ok = ECDSASignature::parse_compact_strict(zero_sig, out);
|
||||
CHECK(!ok, "parse_compact_strict rejects zero sig (r=0,s=0)");
|
||||
|
||||
// r=1, s=0 must be rejected
|
||||
std::array<uint8_t, 64> r1s0{};
|
||||
r1s0[31] = 0x01;
|
||||
ok = ECDSASignature::parse_compact_strict(r1s0, out);
|
||||
CHECK(!ok, "parse_compact_strict rejects r=1,s=0");
|
||||
|
||||
// r=0, s=1 must be rejected
|
||||
std::array<uint8_t, 64> r0s1{};
|
||||
r0s1[63] = 0x01;
|
||||
ok = ECDSASignature::parse_compact_strict(r0s1, out);
|
||||
CHECK(!ok, "parse_compact_strict rejects r=0,s=1");
|
||||
|
||||
// r=1, s=1 must be accepted
|
||||
std::array<uint8_t, 64> r1s1{};
|
||||
r1s1[31] = 0x01;
|
||||
r1s1[63] = 0x01;
|
||||
ok = ECDSASignature::parse_compact_strict(r1s1, out);
|
||||
CHECK(ok, "parse_compact_strict accepts r=1,s=1");
|
||||
|
||||
// r=n, s=1 must be rejected
|
||||
std::array<uint8_t, 64> rns1{};
|
||||
std::memcpy(rns1.data(), ORDER_N.data(), 32);
|
||||
rns1[63] = 0x01;
|
||||
ok = ECDSASignature::parse_compact_strict(rns1, out);
|
||||
CHECK(!ok, "parse_compact_strict rejects r=n");
|
||||
|
||||
// r=1, s=n must be rejected
|
||||
std::array<uint8_t, 64> r1sn{};
|
||||
r1sn[31] = 0x01;
|
||||
std::memcpy(r1sn.data() + 32, ORDER_N.data(), 32);
|
||||
ok = ECDSASignature::parse_compact_strict(r1sn, out);
|
||||
CHECK(!ok, "parse_compact_strict rejects s=n");
|
||||
|
||||
// r=n-1, s=n-1 must be accepted
|
||||
std::array<uint8_t, 64> rnm1snm1{};
|
||||
std::memcpy(rnm1snm1.data(), ORDER_N_MINUS_1.data(), 32);
|
||||
std::memcpy(rnm1snm1.data() + 32, ORDER_N_MINUS_1.data(), 32);
|
||||
ok = ECDSASignature::parse_compact_strict(rnm1snm1, out);
|
||||
CHECK(ok, "parse_compact_strict accepts r=n-1,s=n-1");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Entry point (matches test runner pattern)
|
||||
// ============================================================================
|
||||
|
||||
int test_edge_cases_run() {
|
||||
g_tests_run = 0;
|
||||
g_tests_passed = 0;
|
||||
|
||||
test_scalar_zero_rejection();
|
||||
test_scalar_parse_boundaries();
|
||||
test_infinity_arithmetic();
|
||||
test_ecdsa_zero_key();
|
||||
test_schnorr_boundary();
|
||||
test_bip32_il_geq_n();
|
||||
test_precompute_cache_corrupt();
|
||||
test_scalar_arithmetic_edges();
|
||||
test_field_edge_cases();
|
||||
test_ecdsa_sig_parse_boundaries();
|
||||
|
||||
std::printf("\n--- Edge case summary: %d/%d passed ---\n",
|
||||
g_tests_passed, g_tests_run);
|
||||
|
||||
return (g_tests_passed == g_tests_run) ? 0 : 1;
|
||||
}
|
||||
|
||||
#ifdef STANDALONE_TEST
|
||||
int main() {
|
||||
return test_edge_cases_run();
|
||||
}
|
||||
#endif
|
||||
@ -1,220 +0,0 @@
|
||||
// ============================================================================
|
||||
// Test: SIMD Field Operations + Batch Inverse
|
||||
// ============================================================================
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "secp256k1/field_simd.hpp"
|
||||
#include "secp256k1/field.hpp"
|
||||
|
||||
using namespace secp256k1;
|
||||
using fast::FieldElement;
|
||||
|
||||
static int g_pass = 0, g_fail = 0;
|
||||
|
||||
static void check(bool cond, const char* name) {
|
||||
if (cond) {
|
||||
++g_pass;
|
||||
} else {
|
||||
++g_fail;
|
||||
std::printf(" FAIL: %s\n", name);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_simd_detection() {
|
||||
std::printf("[SIMD] Runtime detection...\n");
|
||||
|
||||
auto tier = simd::detect_simd_tier();
|
||||
std::printf(" Detected: %s\n", simd::simd_tier_name(tier));
|
||||
|
||||
// These should not crash regardless of platform
|
||||
bool const avx2 = simd::avx2_available();
|
||||
bool const avx512 = simd::avx512_available();
|
||||
std::printf(" AVX2: %s, AVX-512: %s\n",
|
||||
avx2 ? "yes" : "no", avx512 ? "yes" : "no");
|
||||
|
||||
check(true, "SIMD detection: no crash");
|
||||
}
|
||||
|
||||
static void test_batch_add() {
|
||||
std::printf("[SIMD] Batch field add...\n");
|
||||
|
||||
constexpr int N = 16;
|
||||
FieldElement a[N], b[N], out[N], expected[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) + 100);
|
||||
b[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) + 200);
|
||||
expected[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
simd::batch_field_add(out, a, b, N);
|
||||
|
||||
bool all_ok = true;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (out[i].to_bytes() != expected[i].to_bytes()) {
|
||||
all_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch add: matches scalar results");
|
||||
}
|
||||
|
||||
static void test_batch_sub() {
|
||||
std::printf("[SIMD] Batch field sub...\n");
|
||||
|
||||
constexpr int N = 16;
|
||||
FieldElement a[N], b[N], out[N], expected[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) + 1000);
|
||||
b[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) + 500);
|
||||
expected[i] = a[i] - b[i];
|
||||
}
|
||||
|
||||
simd::batch_field_sub(out, a, b, N);
|
||||
|
||||
bool all_ok = true;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (out[i].to_bytes() != expected[i].to_bytes()) {
|
||||
all_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch sub: matches scalar results");
|
||||
}
|
||||
|
||||
static void test_batch_mul() {
|
||||
std::printf("[SIMD] Batch field mul...\n");
|
||||
|
||||
constexpr int N = 8;
|
||||
FieldElement a[N], b[N], out[N], expected[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) * 3 + 7);
|
||||
b[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) * 5 + 11);
|
||||
expected[i] = a[i] * b[i];
|
||||
}
|
||||
|
||||
simd::batch_field_mul(out, a, b, N);
|
||||
|
||||
bool all_ok = true;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (out[i].to_bytes() != expected[i].to_bytes()) {
|
||||
all_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch mul: matches scalar results");
|
||||
}
|
||||
|
||||
static void test_batch_sqr() {
|
||||
std::printf("[SIMD] Batch field square...\n");
|
||||
|
||||
constexpr int N = 8;
|
||||
FieldElement a[N], out[N], expected[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) * 7 + 13);
|
||||
expected[i] = a[i].square();
|
||||
}
|
||||
|
||||
simd::batch_field_sqr(out, a, N);
|
||||
|
||||
bool all_ok = true;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (out[i].to_bytes() != expected[i].to_bytes()) {
|
||||
all_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch sqr: matches scalar results");
|
||||
}
|
||||
|
||||
static void test_batch_inv() {
|
||||
std::printf("[SIMD] Batch field inverse (Montgomery's trick)...\n");
|
||||
|
||||
constexpr int N = 16;
|
||||
FieldElement a[N], out[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) * 2 + 3);
|
||||
}
|
||||
|
||||
simd::batch_field_inv(out, a, N);
|
||||
|
||||
// Verify: a[i] * out[i] == 1
|
||||
bool all_ok = true;
|
||||
auto one = FieldElement::one();
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto product = a[i] * out[i];
|
||||
if (product.to_bytes() != one.to_bytes()) {
|
||||
all_ok = false;
|
||||
std::printf(" Failed at index %d\n", i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch inv: a[i] * inv(a[i]) == 1 for all i");
|
||||
}
|
||||
|
||||
static void test_batch_inv_single() {
|
||||
std::printf("[SIMD] Batch inverse: single element...\n");
|
||||
|
||||
FieldElement const a = FieldElement::from_uint64(42);
|
||||
FieldElement out;
|
||||
|
||||
simd::batch_field_inv(&out, &a, 1);
|
||||
|
||||
auto product = a * out;
|
||||
check(product.to_bytes() == FieldElement::one().to_bytes(),
|
||||
"Batch inv single: a * inv(a) == 1");
|
||||
}
|
||||
|
||||
static void test_batch_inv_with_scratch() {
|
||||
std::printf("[SIMD] Batch inverse with explicit scratch...\n");
|
||||
|
||||
constexpr int N = 8;
|
||||
FieldElement a[N], out[N], scratch[N];
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
a[i] = FieldElement::from_uint64(static_cast<uint64_t>(i) * 11 + 17);
|
||||
}
|
||||
|
||||
simd::batch_field_inv(out, a, N, scratch);
|
||||
|
||||
bool all_ok = true;
|
||||
auto one = FieldElement::one();
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto product = a[i] * out[i];
|
||||
if (product.to_bytes() != one.to_bytes()) {
|
||||
all_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
check(all_ok, "Batch inv with scratch: verified");
|
||||
}
|
||||
|
||||
int test_simd_batch_run() {
|
||||
std::printf("===============================================================\n");
|
||||
std::printf(" UltrafastSecp256k1 -- SIMD + Batch Field Tests\n");
|
||||
std::printf("===============================================================\n\n");
|
||||
|
||||
test_simd_detection();
|
||||
test_batch_add();
|
||||
test_batch_sub();
|
||||
test_batch_mul();
|
||||
test_batch_sqr();
|
||||
test_batch_inv();
|
||||
test_batch_inv_single();
|
||||
test_batch_inv_with_scratch();
|
||||
|
||||
std::printf("\n===============================================================\n");
|
||||
std::printf(" Results: %d passed, %d failed (total %d)\n",
|
||||
g_pass, g_fail, g_pass + g_fail);
|
||||
std::printf("===============================================================\n");
|
||||
|
||||
return g_fail > 0 ? 1 : 0;
|
||||
}
|
||||
@ -17,39 +17,39 @@
|
||||
},
|
||||
"summary": {
|
||||
"total": 27,
|
||||
"passed": 19,
|
||||
"failed": 8,
|
||||
"passed": 27,
|
||||
"failed": 0,
|
||||
"skipped": 0,
|
||||
"total_seconds": 0.195707,
|
||||
"verdict": "ISSUES-FOUND"
|
||||
"total_seconds": 0.727543,
|
||||
"verdict": "AUDIT-READY"
|
||||
},
|
||||
"modules": [
|
||||
{ "id": "selftest_core", "name": "OpenCL Selftest (23+ kernel tests)", "section": "math_invariants", "result": "PASS", "time_ms": 157.458784, "error_code": 0 },
|
||||
{ "id": "field_add_sub", "name": "Field add/sub roundtrip", "section": "math_invariants", "result": "PASS", "time_ms": 0.344194, "error_code": 0 },
|
||||
{ "id": "field_mul_comm", "name": "Field mul commutativity", "section": "math_invariants", "result": "PASS", "time_ms": 0.161442, "error_code": 0 },
|
||||
{ "id": "field_inv", "name": "Field inverse roundtrip (a * a^-1 = 1)", "section": "math_invariants", "result": "PASS", "time_ms": 0.277172, "error_code": 0 },
|
||||
{ "id": "field_sqr", "name": "Field square == mul(a,a)", "section": "math_invariants", "result": "PASS", "time_ms": 0.133867, "error_code": 0 },
|
||||
{ "id": "field_negate", "name": "Field negate roundtrip (a + (-a) = 0)", "section": "math_invariants", "result": "PASS", "time_ms": 0.132015, "error_code": 0 },
|
||||
{ "id": "gen_mul_vec", "name": "Generator mul known vectors", "section": "math_invariants", "result": "PASS", "time_ms": 0.074126, "error_code": 0 },
|
||||
{ "id": "scalar_roundtrip", "name": "Scalar/Point consistency", "section": "math_invariants", "result": "PASS", "time_ms": 0.201873, "error_code": 0 },
|
||||
{ "id": "add_dbl_consist", "name": "Point add vs double consistency", "section": "math_invariants", "result": "PASS", "time_ms": 0.243917, "error_code": 0 },
|
||||
{ "id": "scalar_mul_lin", "name": "Scalar mul linearity (a+b)*G = aG+bG", "section": "math_invariants", "result": "PASS", "time_ms": 0.353603, "error_code": 0 },
|
||||
{ "id": "group_order", "name": "Group order basic checks", "section": "math_invariants", "result": "PASS", "time_ms": 0.234223, "error_code": 0 },
|
||||
{ "id": "batch_inv", "name": "Batch inversion (Montgomery trick)", "section": "math_invariants", "result": "PASS", "time_ms": 0.694958, "error_code": 0 },
|
||||
{ "id": "ecdsa_roundtrip", "name": "ECDSA sign + verify roundtrip", "section": "signatures", "result": "FAIL", "time_ms": 5.387598, "error_code": 2 },
|
||||
{ "id": "schnorr_roundtrip", "name": "Schnorr/BIP-340 sign + verify roundtrip", "section": "signatures", "result": "FAIL", "time_ms": 2.694820, "error_code": 2 },
|
||||
{ "id": "ecdsa_wrong_key", "name": "ECDSA verify rejects wrong pubkey", "section": "signatures", "result": "PASS", "time_ms": 4.869086, "error_code": 0 },
|
||||
{ "id": "batch_smul", "name": "Batch scalar mul generator", "section": "batch_advanced", "result": "PASS", "time_ms": 0.454583, "error_code": 0 },
|
||||
{ "id": "batch_j2a", "name": "Batch Jacobian to Affine", "section": "batch_advanced", "result": "FAIL", "time_ms": 0.375855, "error_code": 1 },
|
||||
{ "id": "diff_smul", "name": "OpenCL-host differential scalar mul", "section": "differential", "result": "PASS", "time_ms": 0.074472, "error_code": 0 },
|
||||
{ "id": "rfc6979_determ", "name": "RFC-6979 ECDSA deterministic nonce", "section": "standard_vectors", "result": "PASS", "time_ms": 5.097664, "error_code": 0 },
|
||||
{ "id": "bip340_vectors", "name": "BIP-340 Schnorr known-key roundtrip", "section": "standard_vectors", "result": "FAIL", "time_ms": 1.981557, "error_code": 2 },
|
||||
{ "id": "ecdsa_multi_key", "name": "ECDSA multi-key (10 keys) sign+verify", "section": "protocol_security", "result": "FAIL", "time_ms": 4.830620, "error_code": 20 },
|
||||
{ "id": "schnorr_multi_key", "name": "Schnorr multi-key (10 keys) sign+verify", "section": "protocol_security", "result": "FAIL", "time_ms": 1.892115, "error_code": 20 },
|
||||
{ "id": "fuzz_edge_scalar", "name": "Edge-case scalars (0*G, 1*G, G+G=2G)", "section": "fuzzing", "result": "PASS", "time_ms": 0.244023, "error_code": 0 },
|
||||
{ "id": "fuzz_ecdsa_zero", "name": "ECDSA rejects zero private key", "section": "fuzzing", "result": "PASS", "time_ms": 0.082785, "error_code": 0 },
|
||||
{ "id": "fuzz_schnorr_zero", "name": "Schnorr rejects zero private key", "section": "fuzzing", "result": "PASS", "time_ms": 0.088844, "error_code": 0 },
|
||||
{ "id": "perf_ecdsa_50", "name": "ECDSA 50-iteration stress", "section": "performance", "result": "FAIL", "time_ms": 4.934114, "error_code": 2 },
|
||||
{ "id": "perf_schnorr_25", "name": "Schnorr 25-iteration stress", "section": "performance", "result": "FAIL", "time_ms": 2.247930, "error_code": 2 }
|
||||
{ "id": "selftest_core", "name": "OpenCL Selftest (23+ kernel tests)", "section": "math_invariants", "result": "PASS", "time_ms": 152.799583, "error_code": 0 },
|
||||
{ "id": "field_add_sub", "name": "Field add/sub roundtrip", "section": "math_invariants", "result": "PASS", "time_ms": 0.307649, "error_code": 0 },
|
||||
{ "id": "field_mul_comm", "name": "Field mul commutativity", "section": "math_invariants", "result": "PASS", "time_ms": 0.140150, "error_code": 0 },
|
||||
{ "id": "field_inv", "name": "Field inverse roundtrip (a * a^-1 = 1)", "section": "math_invariants", "result": "PASS", "time_ms": 0.266819, "error_code": 0 },
|
||||
{ "id": "field_sqr", "name": "Field square == mul(a,a)", "section": "math_invariants", "result": "PASS", "time_ms": 0.120384, "error_code": 0 },
|
||||
{ "id": "field_negate", "name": "Field negate roundtrip (a + (-a) = 0)", "section": "math_invariants", "result": "PASS", "time_ms": 0.118151, "error_code": 0 },
|
||||
{ "id": "gen_mul_vec", "name": "Generator mul known vectors", "section": "math_invariants", "result": "PASS", "time_ms": 0.069495, "error_code": 0 },
|
||||
{ "id": "scalar_roundtrip", "name": "Scalar/Point consistency", "section": "math_invariants", "result": "PASS", "time_ms": 0.196458, "error_code": 0 },
|
||||
{ "id": "add_dbl_consist", "name": "Point add vs double consistency", "section": "math_invariants", "result": "PASS", "time_ms": 0.226176, "error_code": 0 },
|
||||
{ "id": "scalar_mul_lin", "name": "Scalar mul linearity (a+b)*G = aG+bG", "section": "math_invariants", "result": "PASS", "time_ms": 0.334149, "error_code": 0 },
|
||||
{ "id": "group_order", "name": "Group order basic checks", "section": "math_invariants", "result": "PASS", "time_ms": 0.224126, "error_code": 0 },
|
||||
{ "id": "batch_inv", "name": "Batch inversion (Montgomery trick)", "section": "math_invariants", "result": "PASS", "time_ms": 0.639383, "error_code": 0 },
|
||||
{ "id": "ecdsa_roundtrip", "name": "ECDSA sign + verify roundtrip", "section": "signatures", "result": "PASS", "time_ms": 7.346328, "error_code": 0 },
|
||||
{ "id": "schnorr_roundtrip", "name": "Schnorr/BIP-340 sign + verify roundtrip", "section": "signatures", "result": "PASS", "time_ms": 6.647268, "error_code": 0 },
|
||||
{ "id": "ecdsa_wrong_key", "name": "ECDSA verify rejects wrong pubkey", "section": "signatures", "result": "PASS", "time_ms": 6.214200, "error_code": 0 },
|
||||
{ "id": "batch_smul", "name": "Batch scalar mul generator", "section": "batch_advanced", "result": "PASS", "time_ms": 0.435053, "error_code": 0 },
|
||||
{ "id": "batch_j2a", "name": "Batch Jacobian to Affine", "section": "batch_advanced", "result": "PASS", "time_ms": 0.518009, "error_code": 0 },
|
||||
{ "id": "diff_smul", "name": "OpenCL-host differential scalar mul", "section": "differential", "result": "PASS", "time_ms": 0.070056, "error_code": 0 },
|
||||
{ "id": "rfc6979_determ", "name": "RFC-6979 ECDSA deterministic nonce", "section": "standard_vectors", "result": "PASS", "time_ms": 6.422079, "error_code": 0 },
|
||||
{ "id": "bip340_vectors", "name": "BIP-340 Schnorr known-key roundtrip", "section": "standard_vectors", "result": "PASS", "time_ms": 8.872533, "error_code": 0 },
|
||||
{ "id": "ecdsa_multi_key", "name": "ECDSA multi-key (10 keys) sign+verify", "section": "protocol_security", "result": "PASS", "time_ms": 60.908449, "error_code": 0 },
|
||||
{ "id": "schnorr_multi_key", "name": "Schnorr multi-key (10 keys) sign+verify", "section": "protocol_security", "result": "PASS", "time_ms": 55.286184, "error_code": 0 },
|
||||
{ "id": "fuzz_edge_scalar", "name": "Edge-case scalars (0*G, 1*G, G+G=2G)", "section": "fuzzing", "result": "PASS", "time_ms": 0.229781, "error_code": 0 },
|
||||
{ "id": "fuzz_ecdsa_zero", "name": "ECDSA rejects zero private key", "section": "fuzzing", "result": "PASS", "time_ms": 0.077824, "error_code": 0 },
|
||||
{ "id": "fuzz_schnorr_zero", "name": "Schnorr rejects zero private key", "section": "fuzzing", "result": "PASS", "time_ms": 0.082019, "error_code": 0 },
|
||||
{ "id": "perf_ecdsa_50", "name": "ECDSA 50-iteration stress", "section": "performance", "result": "PASS", "time_ms": 287.677880, "error_code": 0 },
|
||||
{ "id": "perf_schnorr_25", "name": "Schnorr 25-iteration stress", "section": "performance", "result": "PASS", "time_ms": 131.178937, "error_code": 0 }
|
||||
]
|
||||
}
|
||||
|
||||
@ -8,56 +8,56 @@
|
||||
|
||||
Section: math_invariants
|
||||
--------------------------------------------------
|
||||
[PASS] OpenCL Selftest (23+ kernel tests) (157.459 ms)
|
||||
[PASS] Field add/sub roundtrip (0.344194 ms)
|
||||
[PASS] Field mul commutativity (0.161442 ms)
|
||||
[PASS] Field inverse roundtrip (a * a^-1 = 1) (0.277172 ms)
|
||||
[PASS] Field square == mul(a,a) (0.133867 ms)
|
||||
[PASS] Field negate roundtrip (a + (-a) = 0) (0.132015 ms)
|
||||
[PASS] Generator mul known vectors (0.074126 ms)
|
||||
[PASS] Scalar/Point consistency (0.201873 ms)
|
||||
[PASS] Point add vs double consistency (0.243917 ms)
|
||||
[PASS] Scalar mul linearity (a+b)*G = aG+bG (0.353603 ms)
|
||||
[PASS] Group order basic checks (0.234223 ms)
|
||||
[PASS] Batch inversion (Montgomery trick) (0.694958 ms)
|
||||
[PASS] OpenCL Selftest (23+ kernel tests) (152.8 ms)
|
||||
[PASS] Field add/sub roundtrip (0.307649 ms)
|
||||
[PASS] Field mul commutativity (0.14015 ms)
|
||||
[PASS] Field inverse roundtrip (a * a^-1 = 1) (0.266819 ms)
|
||||
[PASS] Field square == mul(a,a) (0.120384 ms)
|
||||
[PASS] Field negate roundtrip (a + (-a) = 0) (0.118151 ms)
|
||||
[PASS] Generator mul known vectors (0.069495 ms)
|
||||
[PASS] Scalar/Point consistency (0.196458 ms)
|
||||
[PASS] Point add vs double consistency (0.226176 ms)
|
||||
[PASS] Scalar mul linearity (a+b)*G = aG+bG (0.334149 ms)
|
||||
[PASS] Group order basic checks (0.224126 ms)
|
||||
[PASS] Batch inversion (Montgomery trick) (0.639383 ms)
|
||||
|
||||
Section: signatures
|
||||
--------------------------------------------------
|
||||
[FAIL] ECDSA sign + verify roundtrip (5.3876 ms)
|
||||
[FAIL] Schnorr/BIP-340 sign + verify roundtrip (2.69482 ms)
|
||||
[PASS] ECDSA verify rejects wrong pubkey (4.86909 ms)
|
||||
[PASS] ECDSA sign + verify roundtrip (7.34633 ms)
|
||||
[PASS] Schnorr/BIP-340 sign + verify roundtrip (6.64727 ms)
|
||||
[PASS] ECDSA verify rejects wrong pubkey (6.2142 ms)
|
||||
|
||||
Section: batch_advanced
|
||||
--------------------------------------------------
|
||||
[PASS] Batch scalar mul generator (0.454583 ms)
|
||||
[FAIL] Batch Jacobian to Affine (0.375855 ms)
|
||||
[PASS] Batch scalar mul generator (0.435053 ms)
|
||||
[PASS] Batch Jacobian to Affine (0.518009 ms)
|
||||
|
||||
Section: differential
|
||||
--------------------------------------------------
|
||||
[PASS] OpenCL-host differential scalar mul (0.074472 ms)
|
||||
[PASS] OpenCL-host differential scalar mul (0.070056 ms)
|
||||
|
||||
Section: standard_vectors
|
||||
--------------------------------------------------
|
||||
[PASS] RFC-6979 ECDSA deterministic nonce (5.09766 ms)
|
||||
[FAIL] BIP-340 Schnorr known-key roundtrip (1.98156 ms)
|
||||
[PASS] RFC-6979 ECDSA deterministic nonce (6.42208 ms)
|
||||
[PASS] BIP-340 Schnorr known-key roundtrip (8.87253 ms)
|
||||
|
||||
Section: protocol_security
|
||||
--------------------------------------------------
|
||||
[FAIL] ECDSA multi-key (10 keys) sign+verify (4.83062 ms)
|
||||
[FAIL] Schnorr multi-key (10 keys) sign+verify (1.89211 ms)
|
||||
[PASS] ECDSA multi-key (10 keys) sign+verify (60.9084 ms)
|
||||
[PASS] Schnorr multi-key (10 keys) sign+verify (55.2862 ms)
|
||||
|
||||
Section: fuzzing
|
||||
--------------------------------------------------
|
||||
[PASS] Edge-case scalars (0*G, 1*G, G+G=2G) (0.244023 ms)
|
||||
[PASS] ECDSA rejects zero private key (0.082785 ms)
|
||||
[PASS] Schnorr rejects zero private key (0.088844 ms)
|
||||
[PASS] Edge-case scalars (0*G, 1*G, G+G=2G) (0.229781 ms)
|
||||
[PASS] ECDSA rejects zero private key (0.077824 ms)
|
||||
[PASS] Schnorr rejects zero private key (0.082019 ms)
|
||||
|
||||
Section: performance
|
||||
--------------------------------------------------
|
||||
[FAIL] ECDSA 50-iteration stress (4.93411 ms)
|
||||
[FAIL] Schnorr 25-iteration stress (2.24793 ms)
|
||||
[PASS] ECDSA 50-iteration stress (287.678 ms)
|
||||
[PASS] Schnorr 25-iteration stress (131.179 ms)
|
||||
|
||||
================================================================
|
||||
VERDICT: ISSUES-FOUND
|
||||
TOTAL: 19/27 passed, 8 FAILED (0.2 s)
|
||||
VERDICT: AUDIT-READY
|
||||
TOTAL: 27/27 passed (0.7 s)
|
||||
================================================================
|
||||
|
||||
@ -99,13 +99,10 @@ inline void scalar_from_bytes_impl(const uchar bytes[32], Scalar* out) {
|
||||
out->limbs[i] = limb;
|
||||
}
|
||||
// Branchless reduction: if scalar >= n, subtract n
|
||||
ulong borrow = 0, tmp[4], diff;
|
||||
ulong borrow = 0, tmp[4];
|
||||
ulong n[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
for (int i = 0; i < 4; i++) {
|
||||
diff = out->limbs[i] - n[i] - borrow;
|
||||
borrow = (out->limbs[i] < n[i] + borrow) ? 1UL : 0UL;
|
||||
tmp[i] = diff;
|
||||
}
|
||||
for (int i = 0; i < 4; i++)
|
||||
tmp[i] = sub_with_borrow(out->limbs[i], n[i], borrow, &borrow);
|
||||
ulong mask = -(ulong)(borrow == 0); // if no borrow, scalar >= n
|
||||
for (int i = 0; i < 4; i++)
|
||||
out->limbs[i] = (tmp[i] & mask) | (out->limbs[i] & ~mask);
|
||||
@ -241,123 +238,138 @@ inline void scalar_negate_impl(const Scalar* a, Scalar* r) {
|
||||
int is_zero_flag = scalar_is_zero(a);
|
||||
|
||||
ulong borrow = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong diff = n[i] - a->limbs[i] - borrow;
|
||||
borrow = (n[i] < a->limbs[i] + borrow) ? 1UL : 0UL;
|
||||
r->limbs[i] = diff;
|
||||
}
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = sub_with_borrow(n[i], a->limbs[i], borrow, &borrow);
|
||||
// If a was zero, result should be zero too
|
||||
ulong mask = -(ulong)(!is_zero_flag);
|
||||
for (int i = 0; i < 4; i++) r->limbs[i] &= mask;
|
||||
}
|
||||
|
||||
// Helper: branchless conditional subtract n (r -= n if r >= n)
|
||||
inline void scalar_cond_sub_n(Scalar* r) {
|
||||
ulong n[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
ulong borrow = 0;
|
||||
ulong tmp[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
tmp[i] = sub_with_borrow(r->limbs[i], n[i], borrow, &borrow);
|
||||
// borrow==0 means r >= n, use subtracted result
|
||||
ulong mask = -(ulong)(borrow == 0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = (tmp[i] & mask) | (r->limbs[i] & ~mask);
|
||||
}
|
||||
|
||||
// Scalar add mod n: r = (a + b) mod n
|
||||
inline void scalar_add_mod_n_impl(const Scalar* a, const Scalar* b, Scalar* r) {
|
||||
ulong carry = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong sum = a->limbs[i] + b->limbs[i] + carry;
|
||||
carry = (sum < a->limbs[i] || (carry && sum == a->limbs[i])) ? 1UL : 0UL;
|
||||
r->limbs[i] = sum;
|
||||
}
|
||||
// Reduce: if r >= n, subtract n
|
||||
ulong n[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
ulong borrow = 0, tmp[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong diff = r->limbs[i] - n[i] - borrow;
|
||||
borrow = (r->limbs[i] < n[i] + borrow) ? 1UL : 0UL;
|
||||
tmp[i] = diff;
|
||||
}
|
||||
ulong mask = -(ulong)(borrow == 0 || carry);
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = (tmp[i] & mask) | (r->limbs[i] & ~mask);
|
||||
r->limbs[i] = add_with_carry(a->limbs[i], b->limbs[i], carry, &carry);
|
||||
// If carry, definitely >= n; otherwise check and conditionally subtract
|
||||
if (carry) {
|
||||
// r + 2^256 - n: since carry=1, effectively subtract (n - 2^256) = subtract n, add 2^256
|
||||
// which is same as: result = r - n (the carry absorbed the 2^256)
|
||||
ulong n[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
ulong borrow = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = sub_with_borrow(r->limbs[i], n[i], borrow, &borrow);
|
||||
} else {
|
||||
scalar_cond_sub_n(r);
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar sub mod n: r = (a - b) mod n
|
||||
inline void scalar_sub_mod_n_impl(const Scalar* a, const Scalar* b, Scalar* r) {
|
||||
ulong borrow = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong diff = a->limbs[i] - b->limbs[i] - borrow;
|
||||
borrow = (a->limbs[i] < b->limbs[i] + borrow) ? 1UL : 0UL;
|
||||
r->limbs[i] = diff;
|
||||
}
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = sub_with_borrow(a->limbs[i], b->limbs[i], borrow, &borrow);
|
||||
// If borrow, add n back
|
||||
if (borrow) {
|
||||
ulong n[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
ulong carry2 = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong sum = r->limbs[i] + n[i] + carry2;
|
||||
carry2 = (sum < r->limbs[i] || (carry2 && sum == r->limbs[i])) ? 1UL : 0UL;
|
||||
r->limbs[i] = sum;
|
||||
}
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = add_with_carry(r->limbs[i], n[i], carry2, &carry2);
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar multiply mod n (256×256→512 with Barrett reduction)
|
||||
// Scalar multiply mod n: r = (a * b) mod n
|
||||
// Uses 2^256 ≡ NC (mod n) reduction where NC = 2^256 - n
|
||||
inline void scalar_mul_mod_n_impl(const Scalar* a, const Scalar* b, Scalar* r) {
|
||||
// Full 512-bit product
|
||||
// NC = 2^256 - n = {0x402DA1732FC9BEBF, 0x4551231950B75FC4, 1, 0}
|
||||
ulong NC[3] = { 0x402DA1732FC9BEBFUL, 0x4551231950B75FC4UL, 0x1UL };
|
||||
|
||||
// Step 1: Full 512-bit schoolbook multiplication
|
||||
ulong prod[8] = {0,0,0,0,0,0,0,0};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong carry = 0;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
ulong2 full = mul64_full(a->limbs[i], b->limbs[j]);
|
||||
ulong lo = full.x + prod[i+j] + carry;
|
||||
carry = full.y + ((lo < prod[i+j]) ? 1UL : 0UL);
|
||||
prod[i+j] = lo;
|
||||
ulong c1, c2;
|
||||
ulong s = add_with_carry(full.x, prod[i+j], 0, &c1);
|
||||
s = add_with_carry(s, carry, 0, &c2);
|
||||
prod[i+j] = s;
|
||||
carry = full.y + c1 + c2;
|
||||
}
|
||||
prod[i+4] = carry;
|
||||
}
|
||||
|
||||
// Barrett reduction: q = floor(prod * mu / 2^512), then prod - q*n
|
||||
ulong mu[5] = { BARRETT_MU0, BARRETT_MU1, BARRETT_MU2, BARRETT_MU3, BARRETT_MU4 };
|
||||
ulong n_arr[4] = { ORDER_N0, ORDER_N1, ORDER_N2, ORDER_N3 };
|
||||
|
||||
// Approximate quotient q ≈ prod[4..7] (top 256 bits)
|
||||
// For Barrett, we compute q1 = prod >> 252 (approx), q2 = q1 * mu >> 260
|
||||
// Simplified: use top 4 limbs and mu to get candidate quotient
|
||||
ulong q[4];
|
||||
{
|
||||
// q = (prod[4..7] * mu4) + ...
|
||||
// Simplified Barrett: q = prod[4..7] since mu ≈ 2^256 + small
|
||||
// Then subtract n at most twice
|
||||
q[0] = prod[4]; q[1] = prod[5]; q[2] = prod[6]; q[3] = prod[7];
|
||||
}
|
||||
|
||||
// r = prod mod 2^256
|
||||
r->limbs[0] = prod[0]; r->limbs[1] = prod[1];
|
||||
r->limbs[2] = prod[2]; r->limbs[3] = prod[3];
|
||||
|
||||
// Subtract q*n from r
|
||||
ulong qn[4] = {0,0,0,0};
|
||||
// Step 2: Reduce high 256 bits. acc = prod[0..3] + prod[4..7] * NC
|
||||
// prod[4..7] * NC has at most 256+129 = 385 bits
|
||||
ulong acc[7] = {prod[0], prod[1], prod[2], prod[3], 0, 0, 0};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (prod[4+i] == 0) continue;
|
||||
ulong carry = 0;
|
||||
for (int j = 0; j < 4 && (i+j) < 4; j++) {
|
||||
ulong2 full = mul64_full(q[i], n_arr[j]);
|
||||
ulong lo = full.x + qn[i+j] + carry;
|
||||
carry = full.y + ((lo < qn[i+j]) ? 1UL : 0UL);
|
||||
qn[i+j] = lo;
|
||||
for (int j = 0; j < 3; j++) {
|
||||
ulong2 full = mul64_full(prod[4+i], NC[j]);
|
||||
ulong c1, c2;
|
||||
ulong s = add_with_carry(full.x, acc[i+j], 0, &c1);
|
||||
s = add_with_carry(s, carry, 0, &c2);
|
||||
acc[i+j] = s;
|
||||
carry = full.y + c1 + c2;
|
||||
}
|
||||
// Propagate remaining carry
|
||||
for (int k = i+3; k < 7 && carry; k++) {
|
||||
acc[k] = add_with_carry(acc[k], carry, 0, &carry);
|
||||
}
|
||||
}
|
||||
|
||||
ulong borrow = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong diff = r->limbs[i] - qn[i] - borrow;
|
||||
borrow = (r->limbs[i] < qn[i] + borrow) ? 1UL : 0UL;
|
||||
r->limbs[i] = diff;
|
||||
// Step 3: Reduce again. res = acc[0..3] + acc[4..6] * NC
|
||||
ulong res[5] = {acc[0], acc[1], acc[2], acc[3], 0};
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (acc[4+i] == 0) continue;
|
||||
ulong carry = 0;
|
||||
for (int j = 0; j < 3; j++) {
|
||||
if (i+j >= 5) break;
|
||||
ulong2 full = mul64_full(acc[4+i], NC[j]);
|
||||
ulong c1, c2;
|
||||
ulong s = add_with_carry(full.x, res[i+j], 0, &c1);
|
||||
s = add_with_carry(s, carry, 0, &c2);
|
||||
res[i+j] = s;
|
||||
carry = full.y + c1 + c2;
|
||||
}
|
||||
for (int k = i+3; k < 5 && carry; k++) {
|
||||
res[k] = add_with_carry(res[k], carry, 0, &carry);
|
||||
}
|
||||
}
|
||||
|
||||
// Conditional subtract n (at most twice)
|
||||
for (int pass = 0; pass < 2; pass++) {
|
||||
borrow = 0;
|
||||
ulong tmp[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ulong diff = r->limbs[i] - n_arr[i] - borrow;
|
||||
borrow = (r->limbs[i] < n_arr[i] + borrow) ? 1UL : 0UL;
|
||||
tmp[i] = diff;
|
||||
// Step 4: Handle res[4] overflow
|
||||
r->limbs[0] = res[0]; r->limbs[1] = res[1];
|
||||
r->limbs[2] = res[2]; r->limbs[3] = res[3];
|
||||
if (res[4] != 0) {
|
||||
ulong carry = 0;
|
||||
for (int j = 0; j < 3; j++) {
|
||||
ulong2 full = mul64_full(res[4], NC[j]);
|
||||
ulong c1, c2;
|
||||
ulong s = add_with_carry(full.x, r->limbs[j], 0, &c1);
|
||||
s = add_with_carry(s, carry, 0, &c2);
|
||||
r->limbs[j] = s;
|
||||
carry = full.y + c1 + c2;
|
||||
}
|
||||
ulong mask = -(ulong)(borrow == 0);
|
||||
for (int i = 0; i < 4; i++)
|
||||
r->limbs[i] = (tmp[i] & mask) | (r->limbs[i] & ~mask);
|
||||
r->limbs[3] += carry;
|
||||
}
|
||||
|
||||
// Step 5: Conditional subtract n (at most 3 times to ensure < n)
|
||||
scalar_cond_sub_n(r);
|
||||
scalar_cond_sub_n(r);
|
||||
scalar_cond_sub_n(r);
|
||||
}
|
||||
|
||||
// Scalar inverse mod n via binary exponentiation: a^(n-2) mod n
|
||||
|
||||
@ -213,96 +213,65 @@ inline void field_sub_impl(FieldElement* r, const FieldElement* a, const FieldEl
|
||||
// Field Multiplication: r = (a * b) mod p
|
||||
// =============================================================================
|
||||
|
||||
// Helper: add 128-bit product (hi:lo) into 3-register accumulator (c2:c1:c0)
|
||||
inline void muladd(ulong lo, ulong hi, ulong* c0, ulong* c1, ulong* c2) {
|
||||
ulong carry;
|
||||
*c0 = add_with_carry(*c0, lo, 0, &carry);
|
||||
*c1 = add_with_carry(*c1, hi, carry, &carry);
|
||||
*c2 += carry;
|
||||
}
|
||||
|
||||
// Helper: add 128-bit product (hi:lo) doubled into accumulator
|
||||
inline void muladd2(ulong lo, ulong hi, ulong* c0, ulong* c1, ulong* c2) {
|
||||
muladd(lo, hi, c0, c1, c2);
|
||||
muladd(lo, hi, c0, c1, c2);
|
||||
}
|
||||
|
||||
inline void field_mul_impl(FieldElement* r, const FieldElement* a, const FieldElement* b) {
|
||||
// Fully unrolled 4x4 schoolbook multiplication
|
||||
ulong a0 = a->limbs[0], a1 = a->limbs[1], a2 = a->limbs[2], a3 = a->limbs[3];
|
||||
ulong b0 = b->limbs[0], b1 = b->limbs[1], b2 = b->limbs[2], b3 = b->limbs[3];
|
||||
ulong product[8];
|
||||
ulong carry;
|
||||
|
||||
// Row 0: a0 * b[0..3]
|
||||
ulong c0, c1, c2;
|
||||
ulong2 m;
|
||||
m = mul64_full(a0, b0);
|
||||
product[0] = m.x; carry = m.y;
|
||||
|
||||
m = mul64_full(a0, b1);
|
||||
product[1] = m.x + carry;
|
||||
carry = m.y + (product[1] < m.x ? 1UL : 0UL);
|
||||
// Column 0: a0*b0
|
||||
c0 = 0; c1 = 0; c2 = 0;
|
||||
m = mul64_full(a0, b0); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[0] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
m = mul64_full(a0, b2);
|
||||
product[2] = m.x + carry;
|
||||
carry = m.y + (product[2] < m.x ? 1UL : 0UL);
|
||||
// Column 1: a0*b1 + a1*b0
|
||||
m = mul64_full(a0, b1); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a1, b0); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[1] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
m = mul64_full(a0, b3);
|
||||
product[3] = m.x + carry;
|
||||
carry = m.y + (product[3] < m.x ? 1UL : 0UL);
|
||||
product[4] = carry;
|
||||
// Column 2: a0*b2 + a1*b1 + a2*b0
|
||||
m = mul64_full(a0, b2); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a1, b1); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a2, b0); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[2] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
// Row 1: a1 * b[0..3]
|
||||
m = mul64_full(a1, b0);
|
||||
ulong t = product[1] + m.x;
|
||||
carry = m.y + (t < product[1] ? 1UL : 0UL);
|
||||
product[1] = t;
|
||||
// Column 3: a0*b3 + a1*b2 + a2*b1 + a3*b0
|
||||
m = mul64_full(a0, b3); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a1, b2); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a2, b1); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a3, b0); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[3] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
m = mul64_full(a1, b1);
|
||||
t = product[2] + m.x + carry;
|
||||
carry = m.y + (t < product[2] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[2] = t;
|
||||
// Column 4: a1*b3 + a2*b2 + a3*b1
|
||||
m = mul64_full(a1, b3); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a2, b2); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a3, b1); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[4] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
m = mul64_full(a1, b2);
|
||||
t = product[3] + m.x + carry;
|
||||
carry = m.y + (t < product[3] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[3] = t;
|
||||
// Column 5: a2*b3 + a3*b2
|
||||
m = mul64_full(a2, b3); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a3, b2); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[5] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
m = mul64_full(a1, b3);
|
||||
t = product[4] + m.x + carry;
|
||||
carry = m.y + (t < product[4] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[4] = t;
|
||||
product[5] = carry;
|
||||
|
||||
// Row 2: a2 * b[0..3]
|
||||
m = mul64_full(a2, b0);
|
||||
t = product[2] + m.x;
|
||||
carry = m.y + (t < product[2] ? 1UL : 0UL);
|
||||
product[2] = t;
|
||||
|
||||
m = mul64_full(a2, b1);
|
||||
t = product[3] + m.x + carry;
|
||||
carry = m.y + (t < product[3] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[3] = t;
|
||||
|
||||
m = mul64_full(a2, b2);
|
||||
t = product[4] + m.x + carry;
|
||||
carry = m.y + (t < product[4] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[4] = t;
|
||||
|
||||
m = mul64_full(a2, b3);
|
||||
t = product[5] + m.x + carry;
|
||||
carry = m.y + (t < product[5] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[5] = t;
|
||||
product[6] = carry;
|
||||
|
||||
// Row 3: a3 * b[0..3]
|
||||
m = mul64_full(a3, b0);
|
||||
t = product[3] + m.x;
|
||||
carry = m.y + (t < product[3] ? 1UL : 0UL);
|
||||
product[3] = t;
|
||||
|
||||
m = mul64_full(a3, b1);
|
||||
t = product[4] + m.x + carry;
|
||||
carry = m.y + (t < product[4] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[4] = t;
|
||||
|
||||
m = mul64_full(a3, b2);
|
||||
t = product[5] + m.x + carry;
|
||||
carry = m.y + (t < product[5] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[5] = t;
|
||||
|
||||
m = mul64_full(a3, b3);
|
||||
t = product[6] + m.x + carry;
|
||||
carry = m.y + (t < product[6] ? 1UL : 0UL) + (t < carry ? 1UL : 0UL);
|
||||
product[6] = t;
|
||||
product[7] = carry;
|
||||
// Column 6: a3*b3
|
||||
m = mul64_full(a3, b3); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[6] = c0;
|
||||
product[7] = c1;
|
||||
|
||||
field_reduce(r, product);
|
||||
}
|
||||
@ -324,98 +293,43 @@ inline void field_sqr_n_impl(FieldElement* r, int n) {
|
||||
}
|
||||
|
||||
inline void field_sqr_impl(FieldElement* r, const FieldElement* a) {
|
||||
// Fully unrolled squaring: exploits a[i]*a[j] == a[j]*a[i]
|
||||
ulong a0 = a->limbs[0], a1 = a->limbs[1], a2 = a->limbs[2], a3 = a->limbs[3];
|
||||
ulong product[8];
|
||||
ulong carry;
|
||||
ulong c0, c1, c2;
|
||||
ulong2 m;
|
||||
ulong t, c1, c2, c3;
|
||||
|
||||
// -- Off-diagonal products (each appears twice) --
|
||||
m = mul64_full(a0, a1);
|
||||
ulong od01_lo = m.x, od01_hi = m.y;
|
||||
m = mul64_full(a0, a2);
|
||||
ulong od02_lo = m.x, od02_hi = m.y;
|
||||
m = mul64_full(a0, a3);
|
||||
ulong od03_lo = m.x, od03_hi = m.y;
|
||||
m = mul64_full(a1, a2);
|
||||
ulong od12_lo = m.x, od12_hi = m.y;
|
||||
m = mul64_full(a1, a3);
|
||||
ulong od13_lo = m.x, od13_hi = m.y;
|
||||
m = mul64_full(a2, a3);
|
||||
ulong od23_lo = m.x, od23_hi = m.y;
|
||||
// Column 0: a0*a0
|
||||
c0 = 0; c1 = 0; c2 = 0;
|
||||
m = mul64_full(a0, a0); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[0] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
// Accumulate off-diagonal into product[1..6]
|
||||
product[1] = od01_lo;
|
||||
// Column 1: 2*a0*a1
|
||||
m = mul64_full(a0, a1); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
product[1] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
product[2] = od02_lo + od01_hi;
|
||||
carry = (product[2] < od02_lo) ? 1UL : 0UL;
|
||||
// Column 2: 2*a0*a2 + a1*a1
|
||||
m = mul64_full(a0, a2); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a1, a1); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[2] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
t = od03_lo + od02_hi;
|
||||
c1 = (t < od03_lo) ? 1UL : 0UL;
|
||||
t += od12_lo;
|
||||
c2 = (t < od12_lo) ? 1UL : 0UL;
|
||||
t += carry;
|
||||
c3 = (t < carry) ? 1UL : 0UL;
|
||||
product[3] = t;
|
||||
carry = c1 + c2 + c3;
|
||||
// Column 3: 2*a0*a3 + 2*a1*a2
|
||||
m = mul64_full(a0, a3); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a1, a2); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
product[3] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
t = od03_hi + od12_hi;
|
||||
c1 = (t < od03_hi) ? 1UL : 0UL;
|
||||
t += od13_lo;
|
||||
c2 = (t < od13_lo) ? 1UL : 0UL;
|
||||
t += carry;
|
||||
c3 = (t < carry) ? 1UL : 0UL;
|
||||
product[4] = t;
|
||||
carry = c1 + c2 + c3;
|
||||
// Column 4: 2*a1*a3 + a2*a2
|
||||
m = mul64_full(a1, a3); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
m = mul64_full(a2, a2); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[4] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
t = od13_hi + od23_lo;
|
||||
c1 = (t < od13_hi) ? 1UL : 0UL;
|
||||
t += carry;
|
||||
c2 = (t < carry) ? 1UL : 0UL;
|
||||
product[5] = t;
|
||||
carry = c1 + c2;
|
||||
// Column 5: 2*a2*a3
|
||||
m = mul64_full(a2, a3); muladd2(m.x, m.y, &c0, &c1, &c2);
|
||||
product[5] = c0; c0 = c1; c1 = c2; c2 = 0;
|
||||
|
||||
product[6] = od23_hi + carry;
|
||||
|
||||
// Double off-diagonal terms
|
||||
product[7] = product[6] >> 63;
|
||||
product[6] = (product[6] << 1) | (product[5] >> 63);
|
||||
product[5] = (product[5] << 1) | (product[4] >> 63);
|
||||
product[4] = (product[4] << 1) | (product[3] >> 63);
|
||||
product[3] = (product[3] << 1) | (product[2] >> 63);
|
||||
product[2] = (product[2] << 1) | (product[1] >> 63);
|
||||
product[1] = (product[1] << 1);
|
||||
product[0] = 0;
|
||||
|
||||
// Add diagonal terms (a[i]^2)
|
||||
m = mul64_full(a0, a0);
|
||||
product[0] = m.x;
|
||||
t = product[1] + m.y;
|
||||
carry = (t < product[1]) ? 1UL : 0UL;
|
||||
product[1] = t;
|
||||
|
||||
m = mul64_full(a1, a1);
|
||||
t = product[2] + m.x + carry;
|
||||
carry = (t < product[2]) ? 1UL : 0UL;
|
||||
product[2] = t;
|
||||
t = product[3] + m.y + carry;
|
||||
carry = (t < product[3]) ? 1UL : 0UL;
|
||||
product[3] = t;
|
||||
|
||||
m = mul64_full(a2, a2);
|
||||
t = product[4] + m.x + carry;
|
||||
carry = (t < product[4]) ? 1UL : 0UL;
|
||||
product[4] = t;
|
||||
t = product[5] + m.y + carry;
|
||||
carry = (t < product[5]) ? 1UL : 0UL;
|
||||
product[5] = t;
|
||||
|
||||
m = mul64_full(a3, a3);
|
||||
t = product[6] + m.x + carry;
|
||||
carry = (t < product[6]) ? 1UL : 0UL;
|
||||
product[6] = t;
|
||||
product[7] += m.y + carry;
|
||||
// Column 6: a3*a3
|
||||
m = mul64_full(a3, a3); muladd(m.x, m.y, &c0, &c1, &c2);
|
||||
product[6] = c0;
|
||||
product[7] = c1;
|
||||
|
||||
field_reduce(r, product);
|
||||
}
|
||||
|
||||
@ -452,9 +452,36 @@ static bool ocl_schnorr_verify(const uint8_t pubkey_x[32], const uint8_t msg[32]
|
||||
return result != 0;
|
||||
}
|
||||
|
||||
// Helper: compute pubkey via extended kernel (generator_mul_windowed)
|
||||
// This ensures field arithmetic consistency: pubkey and verify use the same
|
||||
// cl_program (secp256k1_extended.cl) with identical field_mul_impl.
|
||||
static JacobianPoint ext_generator_mul(const Scalar& priv) {
|
||||
cl_int err;
|
||||
cl_mem d_scalar = clCreateBuffer(g_ext.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(Scalar), (void*)&priv, &err);
|
||||
JacobianPoint result{};
|
||||
cl_mem d_result = clCreateBuffer(g_ext.context, CL_MEM_WRITE_ONLY,
|
||||
sizeof(JacobianPoint), nullptr, &err);
|
||||
cl_uint count = 1;
|
||||
clSetKernelArg(g_ext.k_gen_mul_win, 0, sizeof(cl_mem), &d_scalar);
|
||||
clSetKernelArg(g_ext.k_gen_mul_win, 1, sizeof(cl_mem), &d_result);
|
||||
clSetKernelArg(g_ext.k_gen_mul_win, 2, sizeof(cl_uint), &count);
|
||||
|
||||
size_t global = 1;
|
||||
clEnqueueNDRangeKernel(g_ext.queue, g_ext.k_gen_mul_win, 1, nullptr,
|
||||
&global, nullptr, 0, nullptr, nullptr);
|
||||
clFinish(g_ext.queue);
|
||||
clEnqueueReadBuffer(g_ext.queue, d_result, CL_TRUE, 0,
|
||||
sizeof(JacobianPoint), &result, 0, nullptr, nullptr);
|
||||
clReleaseMemObject(d_scalar);
|
||||
clReleaseMemObject(d_result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper: get pubkey X bytes from scalar (for Schnorr)
|
||||
// Uses ext_generator_mul to ensure consistency with schnorr_verify kernel.
|
||||
static void get_schnorr_pubkey_x(const Scalar& priv, uint8_t out[32]) {
|
||||
auto P = g_ctx->scalar_mul_generator(priv);
|
||||
auto P = ext_generator_mul(priv);
|
||||
auto aff = jacobian_to_affine(P);
|
||||
// Big-endian serialize field element
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@ -467,7 +494,7 @@ static void get_schnorr_pubkey_x(const Scalar& priv, uint8_t out[32]) {
|
||||
|
||||
// ECDSA roundtrip: sign + verify
|
||||
static int audit_ecdsa_roundtrip() {
|
||||
if (!g_ext.valid) return -1; // skip
|
||||
if (!g_ext.valid) return -1;
|
||||
auto priv = sc_from_u64(42);
|
||||
uint8_t msg[32] = {};
|
||||
msg[0] = 0xAA; msg[31] = 0xBB;
|
||||
@ -475,7 +502,8 @@ static int audit_ecdsa_roundtrip() {
|
||||
ExtendedCL::ECDSASig sig;
|
||||
if (!ocl_ecdsa_sign(priv, msg, sig)) return 1;
|
||||
|
||||
auto pub = g_ctx->scalar_mul_generator(priv);
|
||||
// Use pubkey from extended kernel (same field arithmetic as sign/verify)
|
||||
auto pub = ext_generator_mul(priv);
|
||||
if (!ocl_ecdsa_verify(pub, msg, sig)) return 2;
|
||||
return 0;
|
||||
}
|
||||
@ -507,7 +535,7 @@ static int audit_ecdsa_wrong_key() {
|
||||
ExtendedCL::ECDSASig sig;
|
||||
if (!ocl_ecdsa_sign(priv1, msg, sig)) return 1;
|
||||
|
||||
auto pub2 = g_ctx->scalar_mul_generator(priv2);
|
||||
auto pub2 = ext_generator_mul(priv2);
|
||||
// Verify with wrong key must FAIL
|
||||
if (ocl_ecdsa_verify(pub2, msg, sig)) return 2;
|
||||
return 0;
|
||||
@ -630,7 +658,7 @@ static int audit_ecdsa_multi_key() {
|
||||
auto priv = sc_from_u64(keys[ki]);
|
||||
ExtendedCL::ECDSASig sig;
|
||||
if (!ocl_ecdsa_sign(priv, msg, sig)) return 10 + ki;
|
||||
auto pub = g_ctx->scalar_mul_generator(priv);
|
||||
auto pub = ext_generator_mul(priv);
|
||||
if (!ocl_ecdsa_verify(pub, msg, sig)) return 20 + ki;
|
||||
}
|
||||
return 0;
|
||||
@ -711,7 +739,7 @@ static int audit_fuzz_schnorr_zero_key() {
|
||||
static int audit_perf_ecdsa_stress() {
|
||||
if (!g_ext.valid) return -1;
|
||||
auto priv = sc_from_u64(0xDEADCAFE);
|
||||
auto pub = g_ctx->scalar_mul_generator(priv);
|
||||
auto pub = ext_generator_mul(priv);
|
||||
uint8_t msg[32] = {};
|
||||
|
||||
for (int i = 0; i < 50; i++) {
|
||||
|
||||
@ -1165,7 +1165,7 @@ bool Context::Impl::create_kernels() {
|
||||
kernel_scalar_mul = clCreateKernel(program, "scalar_mul", &err);
|
||||
if (err != CL_SUCCESS) { last_error = "Failed to create scalar_mul kernel"; return false; }
|
||||
|
||||
kernel_batch_jacobian_to_affine = clCreateKernel(program, "batch_jacobian_to_affine", &err);
|
||||
kernel_batch_jacobian_to_affine = clCreateKernel(program, "batch_jacobian_to_affine_kernel", &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
// Non-fatal -- kernel may not exist in older builds
|
||||
kernel_batch_jacobian_to_affine = nullptr;
|
||||
|
||||
@ -149,4 +149,10 @@ if [ "$build_first" -eq 1 ]; then
|
||||
(cd "$repo_root" && $compose_cmd -f docker-compose.ci.yml build ci-base)
|
||||
fi
|
||||
|
||||
(cd "$repo_root" && $compose_cmd -f docker-compose.ci.yml run --rm "$target")
|
||||
# Pass -T when stdin is not a terminal (e.g. git pre-push hook)
|
||||
tty_flag=""
|
||||
if [ ! -t 0 ]; then
|
||||
tty_flag="-T"
|
||||
fi
|
||||
|
||||
(cd "$repo_root" && $compose_cmd -f docker-compose.ci.yml run --rm $tty_flag "$target")
|
||||
|
||||
@ -16,7 +16,7 @@ idf_component_register(
|
||||
"${CPU_SRC}/field_26.cpp"
|
||||
"${CPU_SRC}/field_52.cpp"
|
||||
"${CPU_SRC}/field_asm.cpp"
|
||||
"${CPU_SRC}/field_simd.cpp"
|
||||
|
||||
"${CPU_SRC}/scalar.cpp"
|
||||
"${CPU_SRC}/point.cpp"
|
||||
"${CPU_SRC}/glv.cpp"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user