1830 lines
65 KiB
Go
1830 lines
65 KiB
Go
// Copyright (c) 2021-2023 The Decred developers
|
|
// Use of this source code is governed by an ISC
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package uint256 implements highly optimized fixed precision unsigned 256-bit
|
|
// integer arithmetic.
|
|
package uint256
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math/big"
|
|
"math/bits"
|
|
)
|
|
|
|
// References:
|
|
// [TAOCP2]: The Art of Computer Programming, Volume 2.
|
|
// Seminumerical Algorithms (Knuth, Donald E.)
|
|
|
|
var (
|
|
// zero32 is an array of 32 bytes used for the purposes of zeroing and is
|
|
// defined here to avoid extra allocations.
|
|
zero32 = [32]byte{}
|
|
|
|
// bigUint256Mask is the value 2^256 - 1 (aka max uint256) as a stdlib big
|
|
// integer. It is defined here to save allocations in the conversion code.
|
|
bigTwo256 = new(big.Int).Lsh(big.NewInt(1), 256)
|
|
bigUint256Mask = new(big.Int).Sub(bigTwo256, big.NewInt(1))
|
|
)
|
|
|
|
// Uint256 implements high-performance, zero-allocation, unsigned 256-bit
|
|
// fixed-precision arithmetic. All operations are performed modulo 2^256, so
|
|
// callers may rely on "wrap around" semantics.
|
|
//
|
|
// It implements the primary arithmetic operations (addition, subtraction,
|
|
// multiplication, squaring, division, negation), bitwise operations (lsh, rsh,
|
|
// not, or, and, xor), comparison operations (equals, less, greater, cmp),
|
|
// interpreting and producing big and little endian bytes, and other convenience
|
|
// methods such as determining the minimum number of bits required to represent
|
|
// the current value, whether or not the value can be represented as a uint64
|
|
// without loss of precision, and text formatting with base conversion.
|
|
//
|
|
// Should it be absolutely necessary, conversion to the standard library
|
|
// math/big.Int can be accomplished by using the ToBig or PutBig methods.
|
|
// However, that should typically be avoided when possible as conversion to
|
|
// big.Ints requires allocations and is slower for every operation, often to a
|
|
// significant degree.
|
|
type Uint256 struct {
|
|
// The uint256 is represented as 4 unsigned 64-bit integers in base 2^64.
|
|
//
|
|
// The following depicts the internal representation:
|
|
//
|
|
// --------------------------------------------------------------------
|
|
// | n[3] | n[2] | n[1] | n[0] |
|
|
// | 64 bits | 64 bits | 64 bits | 64 bits |
|
|
// | Mult: 2^(64*3) | Mult: 2^(64*2) | Mult: 2^(64*1) | Mult: 2^(64*0) |
|
|
// -------------------------------------------------------------------
|
|
//
|
|
// For example, consider the number:
|
|
// 0x0000000000000000080000000000000000000000000001000000000000000001 =
|
|
// 2^187 + 2^72 + 1
|
|
//
|
|
// It would be represented as:
|
|
// n[0] = 1
|
|
// n[1] = 2^8
|
|
// n[2] = 2^59
|
|
// n[3] = 0
|
|
//
|
|
// The full 256-bit value is then calculated by looping i from 3..0 and
|
|
// doing sum(n[i] * 2^(64i)) as follows:
|
|
// n[3] * 2^(64*3) = 0 * 2^192 = 0
|
|
// n[2] * 2^(64*2) = 2^59 * 2^128 = 2^187
|
|
// n[1] * 2^(64*1) = 2^8 * 2^64 = 2^72
|
|
// n[0] * 2^(64*0) = 1 * 2^0 = 1
|
|
// Sum: 0 + 2^187 + 2^72 + 1 = 2^187 + 2^72 + 1
|
|
n [4]uint64
|
|
}
|
|
|
|
// Set sets the uint256 equal to the same value as the passed one.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).Set(n2).AddUint64(1) so that n = n2 + 1 where n2 is not
|
|
// modified.
|
|
func (n *Uint256) Set(n2 *Uint256) *Uint256 {
|
|
*n = *n2
|
|
return n
|
|
}
|
|
|
|
// SetUint64 sets the uint256 to the passed unsigned 64-bit integer. This is a
|
|
// convenience function since it is fairly common to perform arithmetic with
|
|
// small native integers.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).SetUint64(2).Mul(n2) so that n = 2 * n2.
|
|
func (n *Uint256) SetUint64(n2 uint64) *Uint256 {
|
|
n.n[0] = n2
|
|
n.n[1] = 0
|
|
n.n[2] = 0
|
|
n.n[3] = 0
|
|
return n
|
|
}
|
|
|
|
// SetBytes interprets the provided array as a 256-bit big-endian unsigned
|
|
// integer and sets the uint256 to the result.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).SetBytes(n2Bytes).AddUint64(1) so that n = n2 + 1.
|
|
func (n *Uint256) SetBytes(b *[32]byte) *Uint256 {
|
|
// Pack the 256 total bits across the 4 uint64 words. This could be done
|
|
// with a for loop, but benchmarks show this unrolled version is about 2
|
|
// times faster than the variant that uses a loop.
|
|
n.n[0] = uint64(b[31]) | uint64(b[30])<<8 | uint64(b[29])<<16 |
|
|
uint64(b[28])<<24 | uint64(b[27])<<32 | uint64(b[26])<<40 |
|
|
uint64(b[25])<<48 | uint64(b[24])<<56
|
|
n.n[1] = uint64(b[23]) | uint64(b[22])<<8 | uint64(b[21])<<16 |
|
|
uint64(b[20])<<24 | uint64(b[19])<<32 | uint64(b[18])<<40 |
|
|
uint64(b[17])<<48 | uint64(b[16])<<56
|
|
n.n[2] = uint64(b[15]) | uint64(b[14])<<8 | uint64(b[13])<<16 |
|
|
uint64(b[12])<<24 | uint64(b[11])<<32 | uint64(b[10])<<40 |
|
|
uint64(b[9])<<48 | uint64(b[8])<<56
|
|
n.n[3] = uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 |
|
|
uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 |
|
|
uint64(b[1])<<48 | uint64(b[0])<<56
|
|
return n
|
|
}
|
|
|
|
// SetBytesLE interprets the provided array as a 256-bit little-endian unsigned
|
|
// integer and sets the uint256 to the result.
|
|
func (n *Uint256) SetBytesLE(b *[32]byte) *Uint256 {
|
|
// Pack the 256 total bits across the 4 uint64 words. This could be done
|
|
// with a for loop, but benchmarks show this unrolled version is about 2
|
|
// times faster than the variant that uses a loop.
|
|
n.n[0] = uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 |
|
|
uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 |
|
|
uint64(b[6])<<48 | uint64(b[7])<<56
|
|
n.n[1] = uint64(b[8]) | uint64(b[9])<<8 | uint64(b[10])<<16 |
|
|
uint64(b[11])<<24 | uint64(b[12])<<32 | uint64(b[13])<<40 |
|
|
uint64(b[14])<<48 | uint64(b[15])<<56
|
|
n.n[2] = uint64(b[16]) | uint64(b[17])<<8 | uint64(b[18])<<16 |
|
|
uint64(b[19])<<24 | uint64(b[20])<<32 | uint64(b[21])<<40 |
|
|
uint64(b[22])<<48 | uint64(b[23])<<56
|
|
n.n[3] = uint64(b[24]) | uint64(b[25])<<8 | uint64(b[26])<<16 |
|
|
uint64(b[27])<<24 | uint64(b[28])<<32 | uint64(b[29])<<40 |
|
|
uint64(b[30])<<48 | uint64(b[31])<<56
|
|
return n
|
|
}
|
|
|
|
// zeroArray32 zeroes the provided 32-byte buffer.
|
|
func zeroArray32(b *[32]byte) {
|
|
copy(b[:], zero32[:])
|
|
}
|
|
|
|
// minInt is a helper function to return the minimum of two ints.
|
|
// This avoids a math import and the need to cast to floats.
|
|
func minInt(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
// SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
|
|
// integer (meaning it is truncated to the final 32 bytes so that it is modulo
|
|
// 2^256), and sets the uint256 to the result.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).SetByteSlice(n2Slice).AddUint64(1) so that n = n2 + 1.
|
|
func (n *Uint256) SetByteSlice(b []byte) *Uint256 {
|
|
var b32 [32]byte
|
|
b = b[len(b)-minInt(len(b), 32):]
|
|
copy(b32[32-len(b):], b)
|
|
n.SetBytes(&b32)
|
|
zeroArray32(&b32)
|
|
return n
|
|
}
|
|
|
|
// SetByteSliceLE interprets the provided slice as a 256-bit little-endian
|
|
// unsigned integer (meaning it is truncated to the first 32 bytes so that it is
|
|
// modulo 2^256), and sets the uint256 to the result.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).SetByteSliceLE(n2Slice).AddUint64(1) so that n = n2 + 1.
|
|
func (n *Uint256) SetByteSliceLE(b []byte) *Uint256 {
|
|
var b32 [32]byte
|
|
b = b[:minInt(len(b), 32)]
|
|
copy(b32[:], b)
|
|
n.SetBytesLE(&b32)
|
|
zeroArray32(&b32)
|
|
return n
|
|
}
|
|
|
|
// PutBytesUnchecked unpacks the uint256 to a 32-byte big-endian value directly
|
|
// into the passed byte slice. The target slice must have at least 32 bytes
|
|
// available or it will panic.
|
|
//
|
|
// There is a similar function, PutBytes, which unpacks the uint256 into a
|
|
// 32-byte array directly. This version is provided since it can be useful to
|
|
// write directly into part of a larger buffer without needing a separate
|
|
// allocation.
|
|
func (n *Uint256) PutBytesUnchecked(b []byte) {
|
|
// Unpack the 256 total bits from the 4 uint64 words. This could be done
|
|
// with a for loop, but benchmarks show this unrolled version is about 2
|
|
// times faster than the variant which uses a loop.
|
|
b[31] = byte(n.n[0])
|
|
b[30] = byte(n.n[0] >> 8)
|
|
b[29] = byte(n.n[0] >> 16)
|
|
b[28] = byte(n.n[0] >> 24)
|
|
b[27] = byte(n.n[0] >> 32)
|
|
b[26] = byte(n.n[0] >> 40)
|
|
b[25] = byte(n.n[0] >> 48)
|
|
b[24] = byte(n.n[0] >> 56)
|
|
b[23] = byte(n.n[1])
|
|
b[22] = byte(n.n[1] >> 8)
|
|
b[21] = byte(n.n[1] >> 16)
|
|
b[20] = byte(n.n[1] >> 24)
|
|
b[19] = byte(n.n[1] >> 32)
|
|
b[18] = byte(n.n[1] >> 40)
|
|
b[17] = byte(n.n[1] >> 48)
|
|
b[16] = byte(n.n[1] >> 56)
|
|
b[15] = byte(n.n[2])
|
|
b[14] = byte(n.n[2] >> 8)
|
|
b[13] = byte(n.n[2] >> 16)
|
|
b[12] = byte(n.n[2] >> 24)
|
|
b[11] = byte(n.n[2] >> 32)
|
|
b[10] = byte(n.n[2] >> 40)
|
|
b[9] = byte(n.n[2] >> 48)
|
|
b[8] = byte(n.n[2] >> 56)
|
|
b[7] = byte(n.n[3])
|
|
b[6] = byte(n.n[3] >> 8)
|
|
b[5] = byte(n.n[3] >> 16)
|
|
b[4] = byte(n.n[3] >> 24)
|
|
b[3] = byte(n.n[3] >> 32)
|
|
b[2] = byte(n.n[3] >> 40)
|
|
b[1] = byte(n.n[3] >> 48)
|
|
b[0] = byte(n.n[3] >> 56)
|
|
}
|
|
|
|
// PutBytesUncheckedLE unpacks the uint256 to a 32-byte little-endian value
|
|
// directly into the passed byte slice. The target slice must have at least 32
|
|
// bytes available or it will panic.
|
|
//
|
|
// There is a similar function, PutBytesLE, which unpacks the uint256 into a
|
|
// 32-byte array directly. This version is provided since it can be useful to
|
|
// write directly into part of a larger buffer without needing a separate
|
|
// allocation.
|
|
func (n *Uint256) PutBytesUncheckedLE(b []byte) {
|
|
// Unpack the 256 total bits from the 4 uint64 words. This could be done
|
|
// with a for loop, but benchmarks show this unrolled version is about 2
|
|
// times faster than the variant which uses a loop.
|
|
b[31] = byte(n.n[3] >> 56)
|
|
b[30] = byte(n.n[3] >> 48)
|
|
b[29] = byte(n.n[3] >> 40)
|
|
b[28] = byte(n.n[3] >> 32)
|
|
b[27] = byte(n.n[3] >> 24)
|
|
b[26] = byte(n.n[3] >> 16)
|
|
b[25] = byte(n.n[3] >> 8)
|
|
b[24] = byte(n.n[3])
|
|
b[23] = byte(n.n[2] >> 56)
|
|
b[22] = byte(n.n[2] >> 48)
|
|
b[21] = byte(n.n[2] >> 40)
|
|
b[20] = byte(n.n[2] >> 32)
|
|
b[19] = byte(n.n[2] >> 24)
|
|
b[18] = byte(n.n[2] >> 16)
|
|
b[17] = byte(n.n[2] >> 8)
|
|
b[16] = byte(n.n[2])
|
|
b[15] = byte(n.n[1] >> 56)
|
|
b[14] = byte(n.n[1] >> 48)
|
|
b[13] = byte(n.n[1] >> 40)
|
|
b[12] = byte(n.n[1] >> 32)
|
|
b[11] = byte(n.n[1] >> 24)
|
|
b[10] = byte(n.n[1] >> 16)
|
|
b[9] = byte(n.n[1] >> 8)
|
|
b[8] = byte(n.n[1])
|
|
b[7] = byte(n.n[0] >> 56)
|
|
b[6] = byte(n.n[0] >> 48)
|
|
b[5] = byte(n.n[0] >> 40)
|
|
b[4] = byte(n.n[0] >> 32)
|
|
b[3] = byte(n.n[0] >> 24)
|
|
b[2] = byte(n.n[0] >> 16)
|
|
b[1] = byte(n.n[0] >> 8)
|
|
b[0] = byte(n.n[0])
|
|
}
|
|
|
|
// PutBytes unpacks the uint256 to a 32-byte big-endian value using the passed
|
|
// byte array.
|
|
//
|
|
// There is a similar function, PutBytesUnchecked, which unpacks the uint256
|
|
// into a slice that must have at least 32 bytes available. This version is
|
|
// provided since it can be useful to write directly into an array that is type
|
|
// checked.
|
|
//
|
|
// Alternatively, there is also Bytes, which unpacks the uint256 into a new
|
|
// array and returns that which can sometimes be more ergonomic in applications
|
|
// that aren't concerned about an additional copy.
|
|
func (n *Uint256) PutBytes(b *[32]byte) {
|
|
n.PutBytesUnchecked(b[:])
|
|
}
|
|
|
|
// PutBytesLE unpacks the uint256 to a 32-byte little-endian value using the
|
|
// passed byte array.
|
|
//
|
|
// There is a similar function, PutBytesUncheckedLE, which unpacks the uint256
|
|
// into a slice that must have at least 32 bytes available. This version is
|
|
// provided since it can be useful to write directly into an array that is type
|
|
// checked.
|
|
//
|
|
// Alternatively, there is also BytesLE, which unpacks the uint256 into a new
|
|
// array and returns that which can sometimes be more ergonomic in applications
|
|
// that aren't concerned about an additional copy.
|
|
func (n *Uint256) PutBytesLE(b *[32]byte) {
|
|
n.PutBytesUncheckedLE(b[:])
|
|
}
|
|
|
|
// Bytes unpacks the uint256 to a 32-byte big-endian array.
|
|
//
|
|
// See PutBytes and PutBytesUnchecked for variants that allow an array or slice
|
|
// to be passed which can be useful to cut down on the number of allocations
|
|
// by allowing the caller to reuse a buffer or write directly into part of a
|
|
// larger buffer.
|
|
func (n *Uint256) Bytes() [32]byte {
|
|
var b [32]byte
|
|
n.PutBytesUnchecked(b[:])
|
|
return b
|
|
}
|
|
|
|
// BytesLE unpacks the uint256 to a 32-byte little-endian array.
|
|
//
|
|
// See PutBytesLE and PutBytesUncheckedLE for variants that allow an array or
|
|
// slice to be passed which can be useful to cut down on the number of
|
|
// allocations by allowing the caller to reuse a buffer or write directly into
|
|
// part of a larger buffer.
|
|
func (n *Uint256) BytesLE() [32]byte {
|
|
var b [32]byte
|
|
n.PutBytesUncheckedLE(b[:])
|
|
return b
|
|
}
|
|
|
|
// Zero sets the uint256 to zero. A newly created uint256 is already set to
|
|
// zero. This function can be useful to clear an existing uint256 for reuse.
|
|
func (n *Uint256) Zero() {
|
|
n.n[0], n.n[1], n.n[2], n.n[3] = 0, 0, 0, 0
|
|
}
|
|
|
|
// IsZero returns whether or not the uint256 is equal to zero.
|
|
func (n *Uint256) IsZero() bool {
|
|
return n.n[0] == 0 && n.n[1] == 0 && n.n[2] == 0 && n.n[3] == 0
|
|
}
|
|
|
|
// IsOdd returns whether or not the uint256 is odd.
|
|
func (n *Uint256) IsOdd() bool {
|
|
// Only odd numbers have the bottom bit set.
|
|
return n.n[0]&1 == 1
|
|
}
|
|
|
|
// IsUint32 returns whether or not the uint256 can be converted to a uint32
|
|
// without any loss of precision. In other words, 0 <= n < 2^32.
|
|
func (n *Uint256) IsUint32() bool {
|
|
return (n.n[0]>>32 | n.n[1] | n.n[2] | n.n[3]) == 0
|
|
}
|
|
|
|
// Uint32 returns the uint32 representation of the value. In other words, it
|
|
// returns the low-order 32 bits of the value as a uint32 which is equivalent to
|
|
// the value modulo 2^32. The caller can determine if this method can be used
|
|
// without truncation with the IsUint32 method.
|
|
func (n *Uint256) Uint32() uint32 {
|
|
return uint32(n.n[0])
|
|
}
|
|
|
|
// IsUint64 returns whether or not the uint256 can be converted to a uint64
|
|
// without any loss of precision. In other words, 0 <= n < 2^64.
|
|
func (n *Uint256) IsUint64() bool {
|
|
return (n.n[1] | n.n[2] | n.n[3]) == 0
|
|
}
|
|
|
|
// Uint64 returns the uint64 representation of the value. In other words, it
|
|
// returns the low-order 64 bits of the value as a uint64 which is equivalent to
|
|
// the value modulo 2^64. The caller can determine if this method can be used
|
|
// without truncation with the IsUint64 method.
|
|
func (n *Uint256) Uint64() uint64 {
|
|
return n.n[0]
|
|
}
|
|
|
|
// Eq returns whether or not the two uint256s represent the same value.
|
|
func (n *Uint256) Eq(n2 *Uint256) bool {
|
|
return n.n[0] == n2.n[0] && n.n[1] == n2.n[1] && n.n[2] == n2.n[2] &&
|
|
n.n[3] == n2.n[3]
|
|
}
|
|
|
|
// EqUint64 returns whether or not the uint256 represents the same value as the
|
|
// given uint64.
|
|
func (n *Uint256) EqUint64(n2 uint64) bool {
|
|
return n.n[0] == n2 && (n.n[1]|n.n[2]|n.n[3]) == 0
|
|
}
|
|
|
|
// Lt returns whether or not the uint256 is less than the given one. That is,
|
|
// it returns true when n < n2.
|
|
func (n *Uint256) Lt(n2 *Uint256) bool {
|
|
var borrow uint64
|
|
_, borrow = bits.Sub64(n.n[0], n2.n[0], borrow)
|
|
_, borrow = bits.Sub64(n.n[1], n2.n[1], borrow)
|
|
_, borrow = bits.Sub64(n.n[2], n2.n[2], borrow)
|
|
_, borrow = bits.Sub64(n.n[3], n2.n[3], borrow)
|
|
return borrow != 0
|
|
}
|
|
|
|
// LtUint64 returns whether or not the uint256 is less than the given uint64.
|
|
// That is, it returns true when n < n2.
|
|
func (n *Uint256) LtUint64(n2 uint64) bool {
|
|
return n.n[0] < n2 && (n.n[1]|n.n[2]|n.n[3]) == 0
|
|
}
|
|
|
|
// LtEq returns whether or not the uint256 is less than or equal to the given
|
|
// one. That is, it returns true when n <= n2.
|
|
func (n *Uint256) LtEq(n2 *Uint256) bool {
|
|
return !n2.Lt(n)
|
|
}
|
|
|
|
// LtEqUint64 returns whether or not the uint256 is less than or equal to the
|
|
// given uint64. That is, it returns true when n <= n2.
|
|
func (n *Uint256) LtEqUint64(n2 uint64) bool {
|
|
return n.n[0] <= n2 && (n.n[1]|n.n[2]|n.n[3]) == 0
|
|
}
|
|
|
|
// Gt returns whether or not the uint256 is greater than the given one. That
|
|
// is, it returns true when n > n2.
|
|
func (n *Uint256) Gt(n2 *Uint256) bool {
|
|
return n2.Lt(n)
|
|
}
|
|
|
|
// GtUint64 returns whether or not the uint256 is greater than the given uint64.
|
|
// That is, it returns true when n > n2.
|
|
func (n *Uint256) GtUint64(n2 uint64) bool {
|
|
return n.n[0] > n2 || (n.n[1]|n.n[2]|n.n[3]) != 0
|
|
}
|
|
|
|
// GtEq returns whether or not the uint256 is greater than or equal to the given
|
|
// one. That is, it returns true when n >= n2.
|
|
func (n *Uint256) GtEq(n2 *Uint256) bool {
|
|
return !n.Lt(n2)
|
|
}
|
|
|
|
// GtEqUint64 returns whether or not the uint256 is greater than or equal to the
|
|
// given uint64. That is, it returns true when n >= n2.
|
|
func (n *Uint256) GtEqUint64(n2 uint64) bool {
|
|
return !n.LtUint64(n2)
|
|
}
|
|
|
|
// Cmp compares the two uint256s and returns -1, 0, or 1 depending on whether
|
|
// the uint256 is less than, equal to, or greater than the given one.
|
|
//
|
|
// That is, it returns:
|
|
//
|
|
// -1 when n < n2
|
|
// 0 when n == n2
|
|
// +1 when n > n2
|
|
func (n *Uint256) Cmp(n2 *Uint256) int {
|
|
var borrow uint64
|
|
r0, borrow := bits.Sub64(n.n[0], n2.n[0], borrow)
|
|
r1, borrow := bits.Sub64(n.n[1], n2.n[1], borrow)
|
|
r2, borrow := bits.Sub64(n.n[2], n2.n[2], borrow)
|
|
r3, borrow := bits.Sub64(n.n[3], n2.n[3], borrow)
|
|
if borrow != 0 {
|
|
return -1
|
|
}
|
|
if r0|r1|r2|r3 == 0 {
|
|
return 0
|
|
}
|
|
return 1
|
|
}
|
|
|
|
// CmpUint64 compares the uint256 with the given uint64 and returns -1, 0, or 1
|
|
// depending on whether the uint256 is less than, equal to, or greater than the
|
|
// uint64.
|
|
//
|
|
// That is, it returns:
|
|
//
|
|
// -1 when n < n2
|
|
// 0 when n == n2
|
|
// +1 when n > n2
|
|
func (n *Uint256) CmpUint64(n2 uint64) int {
|
|
if n.LtUint64(n2) {
|
|
return -1
|
|
}
|
|
if n.GtUint64(n2) {
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Add2 adds the passed two uint256s together modulo 2^256 and stores the result
|
|
// in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Add2(n1, n2).AddUint64(1) so that n = n1 + n2 + 1.
|
|
func (n *Uint256) Add2(n1, n2 *Uint256) *Uint256 {
|
|
var c uint64
|
|
n.n[0], c = bits.Add64(n1.n[0], n2.n[0], c)
|
|
n.n[1], c = bits.Add64(n1.n[1], n2.n[1], c)
|
|
n.n[2], c = bits.Add64(n1.n[2], n2.n[2], c)
|
|
n.n[3], _ = bits.Add64(n1.n[3], n2.n[3], c)
|
|
return n
|
|
}
|
|
|
|
// Add adds the passed uint256 to the existing one modulo 2^256 and stores the
|
|
// result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Add(n2).AddUin64(1) so that n = n + n2 + 1.
|
|
func (n *Uint256) Add(n2 *Uint256) *Uint256 {
|
|
return n.Add2(n, n2)
|
|
}
|
|
|
|
// AddUint64 adds the passed uint64 to the existing uint256 modulo 2^256 and
|
|
// stores the result in n.
|
|
//
|
|
// The scalar is returned to support chaining. This enables syntax like:
|
|
// n.AddUint64(2).MulUint64(2) so that n = (n + 2) * 2.
|
|
func (n *Uint256) AddUint64(n2 uint64) *Uint256 {
|
|
var c uint64
|
|
n.n[0], c = bits.Add64(n.n[0], n2, c)
|
|
n.n[1], c = bits.Add64(n.n[1], 0, c)
|
|
n.n[2], c = bits.Add64(n.n[2], 0, c)
|
|
n.n[3], _ = bits.Add64(n.n[3], 0, c)
|
|
return n
|
|
}
|
|
|
|
// Sub2 subtracts the second given uint256 from the first modulo 2^256 and
|
|
// stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Sub2(n1, n2).AddUint64(1) so that n = n1 - n2 + 1.
|
|
func (n *Uint256) Sub2(n1, n2 *Uint256) *Uint256 {
|
|
var borrow uint64
|
|
n.n[0], borrow = bits.Sub64(n1.n[0], n2.n[0], borrow)
|
|
n.n[1], borrow = bits.Sub64(n1.n[1], n2.n[1], borrow)
|
|
n.n[2], borrow = bits.Sub64(n1.n[2], n2.n[2], borrow)
|
|
n.n[3], _ = bits.Sub64(n1.n[3], n2.n[3], borrow)
|
|
return n
|
|
}
|
|
|
|
// Sub subtracts the given uint256 from the existing one modulo 2^256 and stores
|
|
// the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Sub(n2).AddUint64(1) so that n = n - n2 + 1.
|
|
func (n *Uint256) Sub(n2 *Uint256) *Uint256 {
|
|
var borrow uint64
|
|
n.n[0], borrow = bits.Sub64(n.n[0], n2.n[0], borrow)
|
|
n.n[1], borrow = bits.Sub64(n.n[1], n2.n[1], borrow)
|
|
n.n[2], borrow = bits.Sub64(n.n[2], n2.n[2], borrow)
|
|
n.n[3], _ = bits.Sub64(n.n[3], n2.n[3], borrow)
|
|
return n
|
|
}
|
|
|
|
// SubUint64 subtracts the given uint64 from the uint256 modulo 2^256 and stores
|
|
// the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.SubUint64(1).MulUint64(3) so that n = (n - 1) * 3.
|
|
func (n *Uint256) SubUint64(n2 uint64) *Uint256 {
|
|
var borrow uint64
|
|
n.n[0], borrow = bits.Sub64(n.n[0], n2, borrow)
|
|
n.n[1], borrow = bits.Sub64(n.n[1], 0, borrow)
|
|
n.n[2], borrow = bits.Sub64(n.n[2], 0, borrow)
|
|
n.n[3], _ = bits.Sub64(n.n[3], 0, borrow)
|
|
return n
|
|
}
|
|
|
|
// mulAdd64 multiplies the two passed base 2^64 digits together, adds the given
|
|
// value to the result, and returns the 128-bit result via a (hi, lo) tuple
|
|
// where the upper half of the bits are returned in hi and the lower half in lo.
|
|
func mulAdd64(digit1, digit2, m uint64) (hi, lo uint64) {
|
|
// Note the carry on the final add is safe to discard because the maximum
|
|
// possible value is:
|
|
// (2^64 - 1)(2^64 - 1) + (2^64 - 1) = 2^128 - 2^64
|
|
// and:
|
|
// 2^128 - 2^64 < 2^128.
|
|
var c uint64
|
|
hi, lo = bits.Mul64(digit1, digit2)
|
|
lo, c = bits.Add64(lo, m, 0)
|
|
hi, _ = bits.Add64(hi, 0, c)
|
|
return hi, lo
|
|
}
|
|
|
|
// mulAdd64Carry multiplies the two passed base 2^64 digits together, adds both
|
|
// the given value and carry to the result, and returns the 128-bit result via a
|
|
// (hi, lo) tuple where the upper half of the bits are returned in hi and the
|
|
// lower half in lo.
|
|
func mulAdd64Carry(digit1, digit2, m, c uint64) (hi, lo uint64) {
|
|
// Note the carries on the high order adds are safe to discard because the
|
|
// maximum possible value is:
|
|
// (2^64 - 1)(2^64 - 1) + 2*(2^64 - 1) = 2^128 - 1
|
|
// and:
|
|
// 2^128 - 1 < 2^128.
|
|
var c2 uint64
|
|
hi, lo = bits.Mul64(digit1, digit2)
|
|
lo, c2 = bits.Add64(lo, m, 0)
|
|
hi, _ = bits.Add64(hi, 0, c2)
|
|
lo, c2 = bits.Add64(lo, c, 0)
|
|
hi, _ = bits.Add64(hi, 0, c2)
|
|
return hi, lo
|
|
}
|
|
|
|
// Mul2 multiplies the passed uint256s together modulo 2^256 and stores the
|
|
// result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Mul2(n1, n2).AddUint64(1) so that n = (n1 * n2) + 1.
|
|
func (n *Uint256) Mul2(n1, n2 *Uint256) *Uint256 {
|
|
// The general strategy employed here is:
|
|
// 1) Calculate the 512-bit product of the two uint256s using standard
|
|
// schoolbook multiplication.
|
|
// 2) Reduce the result modulo 2^256.
|
|
//
|
|
// However, some optimizations are used versus naively calculating all
|
|
// intermediate terms:
|
|
// 1) Reuse the high 64 bits from the intermediate 128-bit products directly
|
|
// since that is equivalent to shifting the result right by 64 bits.
|
|
// 2) Ignore all of the products between individual digits that would
|
|
// ordinarily be needed for the full 512-bit product when they are
|
|
// guaranteed to result in values that fall in the half open interval
|
|
// [2^256, 2^512) since they are all ≡ 0 (mod 2^256) given they are
|
|
// necessarily multiples of it.
|
|
// 3) Use native uint64s for the calculations involving the final digit of
|
|
// the result (r3) because all overflow carries to bits >= 256 which, as
|
|
// above, are all ≡ 0 (mod 2^256) and thus safe to discard.
|
|
var r0, r1, r2, r3, c uint64
|
|
|
|
// Terms resulting from the product of the first digit of the second number
|
|
// by all digits of the first number.
|
|
c, r0 = bits.Mul64(n2.n[0], n1.n[0])
|
|
c, r1 = mulAdd64(n2.n[0], n1.n[1], c)
|
|
c, r2 = mulAdd64(n2.n[0], n1.n[2], c)
|
|
r3 = n2.n[0]*n1.n[3] + c
|
|
|
|
// Terms resulting from the product of the second digit of the second number
|
|
// by all digits of the first number except those that are guaranteed to be
|
|
// ≡ 0 (mod 2^256).
|
|
c, r1 = mulAdd64(n2.n[1], n1.n[0], r1)
|
|
c, r2 = mulAdd64Carry(n2.n[1], n1.n[1], r2, c)
|
|
r3 += n2.n[1]*n1.n[2] + c
|
|
|
|
// Terms resulting from the product of the third digit of the second number
|
|
// by all digits of the first number except those that are guaranteed to be
|
|
// ≡ 0 (mod 2^256).
|
|
c, r2 = mulAdd64(n2.n[2], n1.n[0], r2)
|
|
r3 += n2.n[2]*n1.n[1] + c
|
|
|
|
// Terms resulting from the product of the fourth digit of the second number
|
|
// by all digits of the first number except those that are guaranteed to be
|
|
// ≡ 0 (mod 2^256).
|
|
r3 += n2.n[3] * n1.n[0]
|
|
|
|
n.n[0], n.n[1], n.n[2], n.n[3] = r0, r1, r2, r3
|
|
return n
|
|
}
|
|
|
|
// Mul multiplies the passed uint256 by the existing value in n modulo 2^256
|
|
// and stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Mul(n2).AddUint64(1) so that n = (n * n2) + 1.
|
|
func (n *Uint256) Mul(n2 *Uint256) *Uint256 {
|
|
return n.Mul2(n, n2)
|
|
}
|
|
|
|
// MulUint64 multiplies the uint256 by the passed uint64 and stores the result
|
|
// in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.MulUint64(2).Add(n2) so that n = 2 * n + n2.
|
|
func (n *Uint256) MulUint64(n2 uint64) *Uint256 {
|
|
// The general strategy employed here is:
|
|
// 1) Calculate the 320-bit product of the uint256 and uint64 using standard
|
|
// schoolbook multiplication.
|
|
// 2) Reduce the result modulo 2^256.
|
|
//
|
|
// However, some optimizations are used versus naively calculating all
|
|
// intermediate terms:
|
|
// 1) Reuse the high 64 bits from the intermediate 128-bit products directly
|
|
// since that is equivalent to shifting the result right by 64 bits.
|
|
// 2) Use native uint64s for the calculations involving the final digit of
|
|
// the result because all overflow carries to bits >= 256 which is ≡ 0
|
|
// (mod 2^256) given they are necessarily multiples of it.
|
|
var c uint64
|
|
c, n.n[0] = bits.Mul64(n.n[0], n2)
|
|
c, n.n[1] = mulAdd64(n.n[1], n2, c)
|
|
c, n.n[2] = mulAdd64(n.n[2], n2, c)
|
|
n.n[3] = n.n[3]*n2 + c
|
|
return n
|
|
}
|
|
|
|
// SquareVal squares the passed uint256 modulo 2^256 and stores the result in
|
|
// n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.SquareVal(n2).Mul(n2) so that n = n2^2 * n2 = n2^3.
|
|
func (n *Uint256) SquareVal(n2 *Uint256) *Uint256 {
|
|
// Similar to multiplication, the general strategy employed here is:
|
|
// 1) Calculate the 512-bit product of the two uint256s using standard
|
|
// schoolbook multiplication.
|
|
// 2) Reduce the result modulo 2^256.
|
|
//
|
|
// However, some optimizations are used versus naively calculating all
|
|
// intermediate terms:
|
|
// 1) Reuse the high 64 bits from the intermediate 128-bit products directly
|
|
// since that is equivalent to shifting the result right by 64 bits.
|
|
// 2) Ignore all of the products between individual digits that would
|
|
// ordinarily be needed for the full 512-bit product when they are
|
|
// guaranteed to result in values that fall in the half open interval
|
|
// [2^256, 2^512) since they are all ≡ 0 (mod 2^256) given they are
|
|
// necessarily multiples of it.
|
|
// 3) Use native uint64s for the calculations involving the final digit of
|
|
// the result (r3) because all overflow carries to bits >= 256 which, as
|
|
// above, are all ≡ 0 (mod 2^256) and thus safe to discard.
|
|
// 4) Use the fact the number is being multiplied by itself to take
|
|
// advantage of symmetry to double the result of some individual products
|
|
// versus calculating them again.
|
|
var r0, r1, r2, r3, c uint64
|
|
|
|
// Terms resulting from the product of the first digit of the second number
|
|
// by all digits of the first number except those that will be accounted for
|
|
// later via symmetry.
|
|
c, r0 = bits.Mul64(n2.n[0], n2.n[0])
|
|
c, r1 = mulAdd64(n2.n[0], n2.n[1], c)
|
|
c, r2 = mulAdd64(n2.n[0], n2.n[2], c)
|
|
r3 = c
|
|
|
|
// Terms resulting from the product of the second digit of the second number
|
|
// by all digits of the first number except those that will be accounted for
|
|
// later via symmetry and those that are guaranteed to be ≡ 0 (mod 2^256).
|
|
c, r1 = mulAdd64(n2.n[1], n2.n[0], r1)
|
|
c, r2 = mulAdd64Carry(n2.n[1], n2.n[1], r2, c)
|
|
r3 += c
|
|
|
|
// Terms resulting from the product of the third digit of the second number
|
|
// by all digits of the first number except those that will be accounted for
|
|
// later via symmetry and those that are guaranteed to be ≡ 0 (mod 2^256).
|
|
c, r2 = mulAdd64(n2.n[2], n2.n[0], r2)
|
|
r3 += c
|
|
|
|
// Terms resulting from the product of the fourth digit of the second number
|
|
// by all digits of the first number except those that are guaranteed to be
|
|
// ≡ 0 (mod 2^256) and doubling those that were skipped earlier.
|
|
r3 += 2 * (n2.n[0]*n2.n[3] + n2.n[1]*n2.n[2])
|
|
|
|
n.n[0], n.n[1], n.n[2], n.n[3] = r0, r1, r2, r3
|
|
return n
|
|
}
|
|
|
|
// Square squares the uint256 modulo 2^256 and stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Square().Mul(n2) so that n = n^2 * n2.
|
|
func (n *Uint256) Square() *Uint256 {
|
|
return n.SquareVal(n)
|
|
}
|
|
|
|
// numDigits returns the number of base 2^64 digits required to represent the
|
|
// uint256. The result is 0 when the value is 0.
|
|
func (n *Uint256) numDigits() int {
|
|
for i := 3; i >= 0; i-- {
|
|
if n.n[i] != 0 {
|
|
return i + 1
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// prefixLt returns whether or not the first argument is less than the prefix of
|
|
// the same length from the second when treating both as little-endian encoded
|
|
// integers. That is, it returns true when a < b[0:len(a)].
|
|
func prefixLt(a, b []uint64) bool {
|
|
var borrow uint64
|
|
for i := 0; i < len(a); i++ {
|
|
_, borrow = bits.Sub64(a[i], b[i], borrow)
|
|
}
|
|
return borrow != 0
|
|
}
|
|
|
|
// Div2 divides the passed uint256 dividend by the passed uint256 divisor modulo
|
|
// 2^256 and stores the result in n. It will panic if the divisor is 0.
|
|
//
|
|
// This implements truncated division like native Go integers and it is safe to
|
|
// alias the arguments.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Div2(n1, n2).AddUint64(1) so that n = (n1 / n2) + 1.
|
|
func (n *Uint256) Div2(dividend, divisor *Uint256) *Uint256 {
|
|
if divisor.IsZero() {
|
|
panic("division by zero")
|
|
}
|
|
|
|
// Fast path for a couple of obvious cases. The result is zero when the
|
|
// divisor is larger than the dividend and one when they are equal. The
|
|
// remaining code also has preconditions on these cases already being
|
|
// handled.
|
|
if divisor.Gt(dividend) {
|
|
n.Zero()
|
|
return n
|
|
}
|
|
if dividend.Eq(divisor) {
|
|
return n.SetUint64(1)
|
|
}
|
|
|
|
// When the dividend can be fully represented by a uint64, perform the
|
|
// division using native uint64s since the divisor is also guaranteed to be
|
|
// fully representable as a uint64 given it was already confirmed to be
|
|
// smaller than the dividend above.
|
|
if dividend.IsUint64() {
|
|
return n.SetUint64(dividend.n[0] / divisor.n[0])
|
|
}
|
|
|
|
// When the divisor can be fully represented by a uint64, the divisor only
|
|
// consists of a single base 2^64 digit, so use that fact to avoid extra
|
|
// work. It is important to note that the algorithm below also requires the
|
|
// divisor to be at least two digits, so this is not solely a performance
|
|
// optimization.
|
|
if divisor.IsUint64() {
|
|
// The range here satisfies the following inequality:
|
|
// 1 ≤ divisor < 2^64 ≤ dividend < 2^256
|
|
var quotient Uint256
|
|
var r uint64
|
|
for d := dividend.numDigits() - 1; d >= 0; d-- {
|
|
quotient.n[d], r = bits.Div64(r, dividend.n[d], divisor.n[0])
|
|
}
|
|
return n.Set("ient)
|
|
}
|
|
|
|
// The range at this point satisfies the following inequality:
|
|
// 2^64 ≤ divisor < dividend < 2^256
|
|
|
|
// -------------------------------------------------------------------------
|
|
// The following is loosely based on Algorithm 4.3.1D of [TAOCP2] and is
|
|
// therefore based on long (aka schoolbook) division. It has been modified
|
|
// as compared to said algorithm in the following ways for performance
|
|
// reasons:
|
|
//
|
|
// 1. It uses full words instead of splitting each one into half words
|
|
// 2. It does not perform the test which effectively expands the estimate to
|
|
// be based on 3 digits of the dividend/remainder divided by 2 digits
|
|
// of the divisor instead of 2 digits by 1 (Step D3)
|
|
// 3. It conditionally subtracts the partial product from the partial
|
|
// remainder in the case the latter is less than the former instead of
|
|
// always subtracting it and then conditionally adding back (Steps D4,
|
|
// D5, and D6)
|
|
// 4. It only computes the active part of the remainder and throws the rest
|
|
// away since it is not needed here (parts of Steps D3, D4, D5, and D6)
|
|
// 5. It skips undoing the normalization for the remainder since it is not
|
|
// needed here (Step D8)
|
|
//
|
|
// The primary rationale for these changes follows:
|
|
//
|
|
// In regards to (1), almost all modern hardware provides native 128-bit by
|
|
// 64-bit division operations as well as 64-bit by 64-bit multiplication
|
|
// operations which the Go compiler uses in place of the respective
|
|
// `bits.Div64/Mul64` and, as a further benefit, using full words cuts the
|
|
// number of iterations in half.
|
|
//
|
|
// The test referred to by (2) serves to pin the potential error in the
|
|
// estimated quotient to be a max of 1 too high instead of a max of 2 as
|
|
// well as reduce the probability that single correction is needed at all.
|
|
// However, in order to achieve that, given full 64-bit words are used here,
|
|
// the Knuth method would involve an unconditional test which relies on
|
|
// comparing the result of a software-implemented 128-bit product against
|
|
// another 128-bit product as well as a 128-bit addition with carry. That
|
|
// is typically a sensible tradeoff when the number of digits involved is
|
|
// large and when working with half words since an additional potential
|
|
// correction would typically cost more than the test under those
|
|
// circumstances.
|
|
//
|
|
// However, in this implementation, it ends being faster to perform the
|
|
// extra correction than it is to perform the aforementioned test. The
|
|
// primary reasons for this are:
|
|
//
|
|
// - The number of digits is ≤ 4 which means very few operations are needed
|
|
// for the correction
|
|
// - Both the comparison of the product against the remainder and the
|
|
// conditional subtraction of the product from the remainder are able to
|
|
// make use of hardware acceleration that almost all modern hardware
|
|
// provides
|
|
//
|
|
// Moreover, the probability of the initial estimate being correct
|
|
// significantly increases as the size of the base grows. Given this
|
|
// implementation uses a base of 2^64, the probability of needing a
|
|
// correction is extremely low.
|
|
//
|
|
// Concretely, for quotient digits that are uniformly distributed, the
|
|
// probability of guessing the initial estimate correctly with normalization
|
|
// for a base of 2^64 is: 1 - 2/2^64 ~= 99.99999999999999999%. In other
|
|
// words, on average, roughly only 1 out of every 10 quintillion (10^19)
|
|
// will require an adjustment.
|
|
// -------------------------------------------------------------------------
|
|
|
|
// Start by normalizing the arguments.
|
|
//
|
|
// For reference, normalization is the process of scaling the operands such
|
|
// that the leading digit of the divisor is greater than or equal to half of
|
|
// the base (also often called the radix). Since this implementation uses a
|
|
// base of 2^64, half of that equates to 2^63. Mathematically:
|
|
// 2^63 ≤ leading digit of divisor < 2^64
|
|
//
|
|
// Note that the number of digits in each operand at this point satisfies
|
|
// the following inequality:
|
|
// 2 ≤ num divisor digits ≤ num dividend digits ≤ 4
|
|
//
|
|
// Further, notice that being ≥ 2^63 is equivalent to a digit having its
|
|
// most significant bit set. This means the scaling factor can very quickly
|
|
// be determined by counting the number of leading zeros and applied to the
|
|
// operands via bit shifts.
|
|
//
|
|
// This process significantly reduces the probability of requiring an
|
|
// adjustment to the initial estimate for quotient digits later as per the
|
|
// previous comments as well as bounds the possible number of error
|
|
// adjustments.
|
|
numDivisorDigits := divisor.numDigits()
|
|
numDividendDigits := dividend.numDigits()
|
|
sf := uint8(bits.LeadingZeros64(divisor.n[numDivisorDigits-1]))
|
|
var divisorN [4]uint64
|
|
var dividendN [5]uint64
|
|
if sf > 0 {
|
|
// Normalize the divisor by scaling it per the scaling factor.
|
|
for i := numDivisorDigits - 1; i > 0; i-- {
|
|
divisorN[i] = divisor.n[i]<<sf | divisor.n[i-1]>>(64-sf)
|
|
}
|
|
divisorN[0] = divisor.n[0] << sf
|
|
|
|
// Scale the dividend by the same scaling factor too.
|
|
dividendN[numDividendDigits] = dividend.n[numDividendDigits-1] >> (64 - sf)
|
|
for i := numDividendDigits - 1; i > 0; i-- {
|
|
dividendN[i] = dividend.n[i]<<sf | dividend.n[i-1]>>(64-sf)
|
|
}
|
|
dividendN[0] = dividend.n[0] << sf
|
|
} else {
|
|
copy(divisorN[:], divisor.n[:])
|
|
copy(dividendN[:], dividend.n[:])
|
|
}
|
|
|
|
// NOTE: The number of digits in the non-normalized dividend and divisor
|
|
// satisfies the following inequality at this point:
|
|
// 2 ≤ numDivisorDigits ≤ numDividendDigits ≤ 4
|
|
//
|
|
// Therefore the maximum value of d in the loop is 2.
|
|
var p [5]uint64
|
|
var qhat, c, borrow uint64
|
|
n.Zero()
|
|
for d := numDividendDigits - numDivisorDigits; d >= 0; d-- {
|
|
// Estimate the quotient digit by dividing the 2 leading digits of the
|
|
// active part of the remainder by the leading digit of the normalized
|
|
// divisor.
|
|
//
|
|
// Per Theorems A and B in section 4.3.1 of [TAOCP2], when the leading
|
|
// digit of the divisor is normalized as described above, the estimated
|
|
// quotient digit (qhat) produced by this is guaranteed to be ≥ the
|
|
// correct one (q) and at most 2 more. Mathematically:
|
|
// qhat - 2 ≤ q ≤ qhat
|
|
//
|
|
// Further, the algorithm implemented here guarantees that the leading
|
|
// digit of the active part of the remainder will be ≤ the leading digit
|
|
// of the normalized divisor. When it is less, the estimated quotient
|
|
// digit will fit into a single word and everything is normal. However,
|
|
// in the equals case, a second word would be required meaning the
|
|
// calculation would overflow.
|
|
//
|
|
// Combining that with the theorem above, the estimate in the case of
|
|
// overflow must either be one or two more than the maximum possible
|
|
// value for a single digit. In either case, assuming a type capable of
|
|
// representing values larger than a single digit without overflow were
|
|
// used instead, the quotient digit would have to be adjusted downwards
|
|
// until it was correct, and given the correct quotient digit must fit
|
|
// into a single word, it will necessarily either be the maximum
|
|
// possible value allowed for a single digit or one less. Therefore,
|
|
// the equality check here bypasses the overflow by setting the estimate
|
|
// to the max possible value and allowing the loop below to handle the
|
|
// potential remaining (extremely rare) overestimate.
|
|
//
|
|
// Also note that this behavior is not merely an optimization to skip
|
|
// the 128-bit by 64-bit division as both the hardware instruction and
|
|
// the bits.Div64 implementation would otherwise lead to a panic due to
|
|
// overflow as well.
|
|
//
|
|
// For some intuition, consider the base 10 case with a divisor of 5
|
|
// (recall the divisor is normalized, so it must be at least half the
|
|
// base). Then the maximum 2-digit dividend with the leading digit less
|
|
// than 5 is 49 and the minimum with the leading digit equal to 5 is 50.
|
|
// Observe that floor(49/5) = 9 is a single digit result, but
|
|
// floor(50/5) = 10 is not. The worst overestimate case for these
|
|
// conditions would occur at floor(59/5) = floor(11.8) = 11 which is
|
|
// indeed 2 more than the maximum possible single digit 9.
|
|
if dividendN[d+numDivisorDigits] == divisorN[numDivisorDigits-1] {
|
|
qhat = ^uint64(0)
|
|
} else {
|
|
qhat, _ = bits.Div64(dividendN[d+numDivisorDigits],
|
|
dividendN[d+numDivisorDigits-1], divisorN[numDivisorDigits-1])
|
|
}
|
|
|
|
// Calculate the product of the estimated quotient digit and divisor.
|
|
//
|
|
// This is semantically equivalent to the following if uint320 were
|
|
// supported:
|
|
//
|
|
// p = uint320(qhat) * uint320(divisorN)
|
|
//
|
|
// Note that this will compute extra upper terms when the divisor is
|
|
// fewer digits than the max possible even though it is not really
|
|
// needed since they will all be zero. However, because it is only
|
|
// potentially a max of two extra digits, it ends up being faster on
|
|
// average to avoid the additional conditional branches due to
|
|
// pipelining.
|
|
c, p[0] = bits.Mul64(qhat, divisorN[0])
|
|
c, p[1] = mulAdd64(qhat, divisorN[1], c)
|
|
c, p[2] = mulAdd64(qhat, divisorN[2], c)
|
|
p[4], p[3] = mulAdd64(qhat, divisorN[3], c)
|
|
|
|
// Adjust the estimate (and associated product) downwards when they are
|
|
// too high for the active part of the partial remainder. As described
|
|
// above, qhat is guaranteed to be at most two more than the correct
|
|
// quotient digit, so this loop will run at most twice.
|
|
//
|
|
// Moreover, the probability of a single correction is extremely rare
|
|
// and that of two corrections is roughly an additional two orders of
|
|
// magnitude less than that. In other words, in practice, the two
|
|
// corrections case almost never happens.
|
|
for prefixLt(dividendN[d:d+numDivisorDigits+1], p[:]) {
|
|
qhat--
|
|
|
|
// Subtract the divisor from the product to adjust it for the
|
|
// reduced quotient.
|
|
//
|
|
// This is semantically equivalent to the following if uint320 were
|
|
// supported (and with p as a uint320 per above):
|
|
//
|
|
// p -= uint320(divisorN)
|
|
//
|
|
// Note that as with the original calculation of the product above,
|
|
// this will compute extra upper terms when the divisor is fewer
|
|
// digits than the max possible even though it is not really needed
|
|
// for performance reasons as previously described.
|
|
p[0], borrow = bits.Sub64(p[0], divisorN[0], 0)
|
|
p[1], borrow = bits.Sub64(p[1], divisorN[1], borrow)
|
|
p[2], borrow = bits.Sub64(p[2], divisorN[2], borrow)
|
|
p[3], borrow = bits.Sub64(p[3], divisorN[3], borrow)
|
|
p[4] -= borrow
|
|
}
|
|
|
|
// Set the quotient digit in the result.
|
|
n.n[d] = qhat
|
|
|
|
// Update the dividend by subtracting the resulting product from it so
|
|
// that it becomes the new remainder to use for calculating the next
|
|
// quotient digit.
|
|
//
|
|
// Note that this is only calculating the _active_ part of the remainder
|
|
// since the final remainder is not needed and the next iteration slides
|
|
// over one digit to the right meaning none of the upper digits are used
|
|
// and are therefore safe to ignore despite them no longer being
|
|
// accurate.
|
|
//
|
|
// Also, since 'd' is a maximum of 2 due to previous constraints, there
|
|
// is no chance of overflowing the array.
|
|
//
|
|
// This is semantically equivalent to the following if uint192 and the
|
|
// syntax to set base 2^64 digits in little endian directly were
|
|
// supported:
|
|
//
|
|
// dividendN[d:d+3] = uint192(dividendN[d:d+3]) - uint192(p[0:3])
|
|
dividendN[d], borrow = bits.Sub64(dividendN[d], p[0], 0)
|
|
dividendN[d+1], borrow = bits.Sub64(dividendN[d+1], p[1], borrow)
|
|
dividendN[d+2], _ = bits.Sub64(dividendN[d+2], p[2], borrow)
|
|
}
|
|
|
|
return n
|
|
}
|
|
|
|
// Div divides the existing value in n by the passed uint256 divisor modulo
|
|
// 2^256 and stores the result in n. It will panic if the divisor is 0.
|
|
//
|
|
// This implements truncated division like native Go integers.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Div(n2).AddUint64(1) so that n = (n / n2) + 1.
|
|
func (n *Uint256) Div(divisor *Uint256) *Uint256 {
|
|
return n.Div2(n, divisor)
|
|
}
|
|
|
|
// DivUint64 divides the existing value in n by the passed uint64 divisor modulo
|
|
// 2^256 and stores the result in n. It will panic if the divisor is 0.
|
|
//
|
|
// This implements truncated division like native Go integers.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.DivUint64(2).AddUint64(1) so that n = (n / 2) + 1.
|
|
func (n *Uint256) DivUint64(divisor uint64) *Uint256 {
|
|
if divisor == 0 {
|
|
panic("division by zero")
|
|
}
|
|
|
|
// Fast path for a couple of obvious cases. The result is zero when the
|
|
// dividend is smaller than the divisor and one when they are equal. The
|
|
// remaining code also has preconditions on these cases already being
|
|
// handled.
|
|
if n.LtUint64(divisor) {
|
|
n.Zero()
|
|
return n
|
|
}
|
|
if n.EqUint64(divisor) {
|
|
return n.SetUint64(1)
|
|
}
|
|
|
|
// The range here satisfies the following inequalities:
|
|
// 1 ≤ divisor < 2^64
|
|
// 1 ≤ divisor < dividend < 2^256
|
|
var quotient Uint256
|
|
var r uint64
|
|
for d := n.numDigits() - 1; d >= 0; d-- {
|
|
quotient.n[d], r = bits.Div64(r, n.n[d], divisor)
|
|
}
|
|
return n.Set("ient)
|
|
}
|
|
|
|
// NegateVal negates the passed uint256 modulo 2^256 and stores the result in
|
|
// n. In other words, n will be set to the two's complement of the passed
|
|
// uint256.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.NegateVal(n2).AddUint64(1) so that n = -n2 + 1.
|
|
func (n *Uint256) NegateVal(n2 *Uint256) *Uint256 {
|
|
// This uses math/bits to perform the negation via subtraction from 0 as the
|
|
// compiler will replace it with the relevant intrinsic on most
|
|
// architectures.
|
|
var borrow uint64
|
|
n.n[0], borrow = bits.Sub64(0, n2.n[0], borrow)
|
|
n.n[1], borrow = bits.Sub64(0, n2.n[1], borrow)
|
|
n.n[2], borrow = bits.Sub64(0, n2.n[2], borrow)
|
|
n.n[3], _ = bits.Sub64(0, n2.n[3], borrow)
|
|
return n
|
|
}
|
|
|
|
// Negate negates the uint256 modulo 2^256. In other words, n will be set to
|
|
// its two's complement.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Negate().AddUint64(1) so that n = -n + 1.
|
|
func (n *Uint256) Negate() *Uint256 {
|
|
return n.NegateVal(n)
|
|
}
|
|
|
|
// LshVal shifts the passed uint256 to the left the given number of bits and
|
|
// stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.LshVal(n2, 2).AddUint64(1) so that n = (n2 << 2) + 1.
|
|
func (n *Uint256) LshVal(n2 *Uint256, bits uint32) *Uint256 {
|
|
// Fast path for large and zero shifts.
|
|
if bits > 255 {
|
|
n.Zero()
|
|
return n
|
|
}
|
|
if bits == 0 {
|
|
return n.Set(n2)
|
|
}
|
|
|
|
// Shift entire words when possible.
|
|
switch {
|
|
case bits >= 192:
|
|
// Left shift 192 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = n2.n[0], 0, 0, 0
|
|
bits -= 192
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[3] <<= bits
|
|
return n
|
|
|
|
case bits >= 128:
|
|
// Left shift 128 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = n2.n[1], n2.n[0], 0, 0
|
|
bits -= 128
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[3] = (n.n[3] << bits) | (n.n[2] >> (64 - bits))
|
|
n.n[2] <<= bits
|
|
return n
|
|
|
|
case bits >= 64:
|
|
// Left shift 64 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = n2.n[2], n2.n[1], n2.n[0], 0
|
|
bits -= 64
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[3] = (n.n[3] << bits) | (n.n[2] >> (64 - bits))
|
|
n.n[2] = (n.n[2] << bits) | (n.n[1] >> (64 - bits))
|
|
n.n[1] <<= bits
|
|
return n
|
|
}
|
|
|
|
// At this point the shift must be less than 64 bits, so shift each word
|
|
// accordingly.
|
|
n.n[3] = (n2.n[3] << bits) | (n2.n[2] >> (64 - bits))
|
|
n.n[2] = (n2.n[2] << bits) | (n2.n[1] >> (64 - bits))
|
|
n.n[1] = (n2.n[1] << bits) | (n2.n[0] >> (64 - bits))
|
|
n.n[0] = n2.n[0] << bits
|
|
return n
|
|
}
|
|
|
|
// Lsh shifts the uint256 to the left the given number of bits and stores the
|
|
// result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Lsh(2).AddUint64(1) so that n = (n << 2) + 1.
|
|
func (n *Uint256) Lsh(bits uint32) *Uint256 {
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
return n.LshVal(n, bits)
|
|
}
|
|
|
|
// RshVal shifts the passed uint256 to the right the given number of bits and
|
|
// stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.RshVal(n2, 2).AddUint64(1) so that n = (n2 >> 2) + 1.
|
|
func (n *Uint256) RshVal(n2 *Uint256, bits uint32) *Uint256 {
|
|
// Fast path for large and zero shifts.
|
|
if bits > 255 {
|
|
n.Zero()
|
|
return n
|
|
}
|
|
if bits == 0 {
|
|
return n.Set(n2)
|
|
}
|
|
|
|
// Shift entire words when possible.
|
|
switch {
|
|
case bits >= 192:
|
|
// Right shift 192 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = 0, 0, 0, n2.n[3]
|
|
bits -= 192
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[0] >>= bits
|
|
return n
|
|
|
|
case bits >= 128:
|
|
// Right shift 128 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = 0, 0, n2.n[3], n2.n[2]
|
|
bits -= 128
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[0] = (n.n[0] >> bits) | (n.n[1] << (64 - bits))
|
|
n.n[1] >>= bits
|
|
return n
|
|
|
|
case bits >= 64:
|
|
// Right shift 64 bits.
|
|
n.n[3], n.n[2], n.n[1], n.n[0] = 0, n2.n[3], n2.n[2], n2.n[1]
|
|
bits -= 64
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
n.n[0] = (n.n[0] >> bits) | (n.n[1] << (64 - bits))
|
|
n.n[1] = (n.n[1] >> bits) | (n.n[2] << (64 - bits))
|
|
n.n[2] >>= bits
|
|
return n
|
|
}
|
|
|
|
// At this point the shift must be less than 64 bits, so shift each word
|
|
// accordingly.
|
|
n.n[0] = (n2.n[0] >> bits) | (n2.n[1] << (64 - bits))
|
|
n.n[1] = (n2.n[1] >> bits) | (n2.n[2] << (64 - bits))
|
|
n.n[2] = (n2.n[2] >> bits) | (n2.n[3] << (64 - bits))
|
|
n.n[3] = n2.n[3] >> bits
|
|
return n
|
|
}
|
|
|
|
// Rsh shifts the uint256 to the right the given number of bits and stores the
|
|
// result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Rsh(2).AddUint64(1) so that n = (n >> 2) + 1.
|
|
func (n *Uint256) Rsh(bits uint32) *Uint256 {
|
|
if bits == 0 {
|
|
return n
|
|
}
|
|
return n.RshVal(n, bits)
|
|
}
|
|
|
|
// Not computes the bitwise not of the uint256 and stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Not().AddUint64(1) so that n = ^n + 1.
|
|
func (n *Uint256) Not() *Uint256 {
|
|
n.n[0] = ^n.n[0]
|
|
n.n[1] = ^n.n[1]
|
|
n.n[2] = ^n.n[2]
|
|
n.n[3] = ^n.n[3]
|
|
return n
|
|
}
|
|
|
|
// Or computes the bitwise or of the uint256 and the passed uint256 and stores
|
|
// the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Or(n2).AddUint64(1) so that n = (n | n2) + 1.
|
|
func (n *Uint256) Or(n2 *Uint256) *Uint256 {
|
|
n.n[0] |= n2.n[0]
|
|
n.n[1] |= n2.n[1]
|
|
n.n[2] |= n2.n[2]
|
|
n.n[3] |= n2.n[3]
|
|
return n
|
|
}
|
|
|
|
// And computes the bitwise and of the uint256 and the passed uint256 and stores
|
|
// the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.And(n2).AddUint64(1) so that n = (n & n2) + 1.
|
|
func (n *Uint256) And(n2 *Uint256) *Uint256 {
|
|
n.n[0] &= n2.n[0]
|
|
n.n[1] &= n2.n[1]
|
|
n.n[2] &= n2.n[2]
|
|
n.n[3] &= n2.n[3]
|
|
return n
|
|
}
|
|
|
|
// Xor computes the bitwise exclusive or of the uint256 and the passed uint256
|
|
// and stores the result in n.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n.Xor(n2).AddUint64(1) so that n = (n ^ n2) + 1.
|
|
func (n *Uint256) Xor(n2 *Uint256) *Uint256 {
|
|
n.n[0] ^= n2.n[0]
|
|
n.n[1] ^= n2.n[1]
|
|
n.n[2] ^= n2.n[2]
|
|
n.n[3] ^= n2.n[3]
|
|
return n
|
|
}
|
|
|
|
// BitLen returns the minimum number of bits required to represent the uint256.
|
|
// The result is 0 when the value is 0.
|
|
func (n *Uint256) BitLen() uint16 {
|
|
if w := n.n[3]; w > 0 {
|
|
return uint16(bits.Len64(w)) + 192
|
|
}
|
|
if w := n.n[2]; w > 0 {
|
|
return uint16(bits.Len64(w)) + 128
|
|
}
|
|
if w := n.n[1]; w > 0 {
|
|
return uint16(bits.Len64(w)) + 64
|
|
}
|
|
return uint16(bits.Len64(n.n[0]))
|
|
}
|
|
|
|
// bitsPerInternalWord is the number of bits used for each internal word of the
|
|
// uint256.
|
|
const bitsPerInternalWord = 64
|
|
|
|
// toBin converts the uint256 to its string representation in base 2.
|
|
func (n *Uint256) toBin() []byte {
|
|
if n.IsZero() {
|
|
return []byte("0")
|
|
}
|
|
|
|
// Create space for the max possible number of output digits.
|
|
maxOutDigits := n.BitLen()
|
|
result := make([]byte, maxOutDigits)
|
|
|
|
// Convert each internal base 2^64 word to base 2 from least to most
|
|
// significant. Since the value is guaranteed to be non-zero per a previous
|
|
// check, there will always be a nonzero most-significant word. Also, note
|
|
// that no partial digit handling is needed in this case because the shift
|
|
// amount evenly divides the bits per internal word.
|
|
const shift = 1
|
|
const mask = 1<<shift - 1
|
|
const digitsPerInternalWord = bitsPerInternalWord
|
|
outputIdx := maxOutDigits - 1
|
|
numInputWords := n.numDigits()
|
|
inputWord := n.n[0]
|
|
for inputIdx := 1; inputIdx < numInputWords; inputIdx++ {
|
|
for i := 0; i < digitsPerInternalWord; i++ {
|
|
result[outputIdx] = '0' + byte(inputWord&mask)
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
inputWord = n.n[inputIdx]
|
|
}
|
|
for inputWord != 0 {
|
|
result[outputIdx] = '0' + byte(inputWord&mask)
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
|
|
return result[outputIdx+1:]
|
|
}
|
|
|
|
// toOctal converts the uint256 to its string representation in base 8.
|
|
func (n *Uint256) toOctal() []byte {
|
|
if n.IsZero() {
|
|
return []byte("0")
|
|
}
|
|
|
|
// Create space for the max possible number of output digits using the fact
|
|
// that 3 bits converts directly to a single octal digit.
|
|
maxOutDigits := (n.BitLen() + 2) / 3
|
|
result := make([]byte, maxOutDigits)
|
|
|
|
// Convert each internal base 2^64 word to base 8 from least to most
|
|
// significant. Since the value is guaranteed to be non-zero per a previous
|
|
// check, there will always be a nonzero most-significant word. Also, note
|
|
// that partial digit handling is needed in this case because the shift
|
|
// amount does not evenly divide the bits per internal word.
|
|
const shift = 3
|
|
const mask = 1<<shift - 1
|
|
unconvertedBits := bitsPerInternalWord
|
|
outputIdx := maxOutDigits - 1
|
|
numInputWords := n.numDigits()
|
|
inputWord := n.n[0]
|
|
for inputIdx := 1; inputIdx < numInputWords; inputIdx++ {
|
|
// Convert full digits.
|
|
for ; unconvertedBits >= shift; unconvertedBits -= shift {
|
|
result[outputIdx] = '0' + byte(inputWord&mask)
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
|
|
// Move to the next input word when there are not any remaining
|
|
// unconverted bits that need to be handled.
|
|
if unconvertedBits == 0 {
|
|
inputWord = n.n[inputIdx]
|
|
unconvertedBits = bitsPerInternalWord
|
|
continue
|
|
}
|
|
|
|
// Account for the remaining unconverted bits from the current word and
|
|
// the bits needed from the next word to form a full digit for the next
|
|
// digit.
|
|
inputWord |= n.n[inputIdx] << unconvertedBits
|
|
result[outputIdx] = '0' + byte(inputWord&mask)
|
|
outputIdx--
|
|
|
|
// Move to the next input word while accounting for the bits already
|
|
// consumed above by shifting it and updating the unconverted bits
|
|
// accordingly.
|
|
inputWord = n.n[inputIdx] >> (shift - unconvertedBits)
|
|
unconvertedBits = bitsPerInternalWord - (shift - unconvertedBits)
|
|
}
|
|
for inputWord != 0 {
|
|
result[outputIdx] = '0' + byte(inputWord&mask)
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
|
|
return result[outputIdx+1:]
|
|
}
|
|
|
|
// maxPow10ForInternalWord is the maximum power of 10 that will fit into an
|
|
// internal word. It is the value 10^floor(64 / log2(10)) and is used when
|
|
// converting to base 10 in order to significantly reduce the number of
|
|
// divisions needed.
|
|
var maxPow10ForInternalWord = new(Uint256).SetUint64(1e19)
|
|
|
|
// toDecimal converts the uint256 to its string representation in base 10.
|
|
func (n *Uint256) toDecimal() []byte {
|
|
if n.IsZero() {
|
|
return []byte("0")
|
|
}
|
|
|
|
// Create space for the max possible number of output digits.
|
|
//
|
|
// Note that the actual total number of output digits is usually calculated
|
|
// as:
|
|
// floor(log2(n) / log2(base)) + 1
|
|
//
|
|
// However, in order to avoid more expensive calculation of the full log2 of
|
|
// the value, the code below instead calculates a value that might overcount
|
|
// by a max of one digit and trims the result as needed via the following
|
|
// slightly modified version of the formula:
|
|
// floor(bitlen(n) / log2(base)) + 1
|
|
//
|
|
// The modified formula is guaranteed to be large enough because:
|
|
// (a) floor(log2(x)) ≤ log2(x) ≤ floor(log2(x)) + 1
|
|
// (b) bitlen(x) = floor(log2(x)) + 1
|
|
//
|
|
// Which implies:
|
|
// (c) floor(log2(n) / log2(base)) ≤ floor(floor(log2(n))+1) / log2(base))
|
|
// (d) floor(log2(n) / log2(base)) ≤ floor(bitlen(n)) / log2(base))
|
|
//
|
|
// Note that (c) holds since the left hand side of the inequality has a
|
|
// dividend that is ≤ the right hand side dividend due to (a) while the
|
|
// divisor is = the right hand side divisor, and then (d) is equal to (c)
|
|
// per (b). Adding 1 to both sides of (d) yields an inequality where the
|
|
// left hand side is the typical formula and the right hand side is the
|
|
// modified formula thereby proving it will never under count.
|
|
const log2Of10 = 3.321928094887362
|
|
maxOutDigits := uint8(float64(n.BitLen())/log2Of10) + 1
|
|
result := make([]byte, maxOutDigits)
|
|
|
|
// Convert each internal base 2^64 word to base 10 from least to most
|
|
// significant. Since the value is guaranteed to be non-zero per a previous
|
|
// check, there will always be a nonzero most-significant word. Also, note
|
|
// that partial digit handling is needed in this case because the shift
|
|
// amount does not evenly divide the bits per internal word.
|
|
const outputDigitsPerDiv = 19
|
|
var remainingDigitsPerDiv uint8
|
|
var quo, rem, t Uint256
|
|
var r uint64
|
|
outputIdx := maxOutDigits - 1
|
|
quo = *n
|
|
for !quo.IsZero() {
|
|
for i := uint8(0); i < remainingDigitsPerDiv; i++ {
|
|
result[outputIdx] = '0'
|
|
outputIdx--
|
|
}
|
|
remainingDigitsPerDiv = outputDigitsPerDiv
|
|
|
|
rem.Set(&quo)
|
|
quo.Div(maxPow10ForInternalWord)
|
|
t.Mul2(&quo, maxPow10ForInternalWord)
|
|
inputWord := rem.Sub(&t).Uint64()
|
|
for inputWord != 0 {
|
|
inputWord, r = inputWord/10, inputWord%10
|
|
result[outputIdx] = '0' + byte(r)
|
|
outputIdx--
|
|
remainingDigitsPerDiv--
|
|
}
|
|
}
|
|
|
|
return result[outputIdx+1:]
|
|
}
|
|
|
|
// toHex converts the uint256 to its string representation in lowercase base 16.
|
|
func (n *Uint256) toHex() []byte {
|
|
if n.IsZero() {
|
|
return []byte("0")
|
|
}
|
|
|
|
// Create space for the max possible number of output digits using the fact
|
|
// that a nibble converts directly to a single hex digit.
|
|
maxOutDigits := (n.BitLen() + 3) / 4
|
|
result := make([]byte, maxOutDigits)
|
|
|
|
// Convert each internal base 2^64 word to base 16 from least to most
|
|
// significant. Since the value is guaranteed to be non-zero per a
|
|
// previous check, there will always be a nonzero most-significant word.
|
|
// Also, note that no partial digit handling is needed in this case
|
|
// because the shift amount evenly divides the bits per internal word.
|
|
const alphabet = "0123456789abcdef"
|
|
const shift = 4
|
|
const mask = 1<<shift - 1
|
|
const digitsPerInternalWord = bitsPerInternalWord / shift
|
|
outputIdx := maxOutDigits - 1
|
|
numInputWords := n.numDigits()
|
|
inputWord := n.n[0]
|
|
for inputIdx := 1; inputIdx < numInputWords; inputIdx++ {
|
|
for i := 0; i < digitsPerInternalWord; i++ {
|
|
result[outputIdx] = alphabet[inputWord&mask]
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
inputWord = n.n[inputIdx]
|
|
}
|
|
for inputWord != 0 {
|
|
result[outputIdx] = alphabet[inputWord&mask]
|
|
inputWord >>= shift
|
|
outputIdx--
|
|
}
|
|
|
|
return result[outputIdx+1:]
|
|
}
|
|
|
|
// OutputBase represents a specific base to use for the string representation of
|
|
// a number.
|
|
type OutputBase int
|
|
|
|
// These constants define the supported output bases.
|
|
const (
|
|
// OutputBaseBinary indicates a string representation of a uint256 in
|
|
// base 2.
|
|
OutputBaseBinary OutputBase = 2
|
|
|
|
// OutputBaseOctal indicates a string representation of a uint256 in base 8.
|
|
OutputBaseOctal OutputBase = 8
|
|
|
|
// OutputBaseDecimal indicates a string representation of a uint256 in base
|
|
// 10.
|
|
OutputBaseDecimal OutputBase = 10
|
|
|
|
// OutputBaseHex indicates a string representation of a uint256 in base 16.
|
|
OutputBaseHex OutputBase = 16
|
|
)
|
|
|
|
// Text returns the string representation of the uint256 in the given base which
|
|
// must be on of the supported bases as defined by the OutputBase type.
|
|
//
|
|
// It will return "<nil>" when the uint256 pointer is nil and a message that
|
|
// indicates the base is not supported along with the value in base 10 in the
|
|
// case the caller goes out of its way to call it with an invalid base.
|
|
func (n *Uint256) Text(base OutputBase) string {
|
|
if n == nil {
|
|
return "<nil>"
|
|
}
|
|
|
|
switch base {
|
|
case OutputBaseHex:
|
|
return string(n.toHex())
|
|
case OutputBaseDecimal:
|
|
return string(n.toDecimal())
|
|
case OutputBaseBinary:
|
|
return string(n.toBin())
|
|
case OutputBaseOctal:
|
|
return string(n.toOctal())
|
|
}
|
|
|
|
return fmt.Sprintf("base %d not supported (Uint256=%s)", int(base), n)
|
|
}
|
|
|
|
// String returns the scalar as a human-readable decimal string.
|
|
func (n Uint256) String() string {
|
|
return string(n.toDecimal())
|
|
}
|
|
|
|
// Format implements fmt.Formatter. It accepts the following format verbs:
|
|
//
|
|
// 'v' default format which is decimal
|
|
// 's' default string format which is decimal
|
|
// 'b' binary
|
|
// 'o' octal with 0 prefix when accompanied by #
|
|
// 'O' octal with 0o prefix
|
|
// 'd' decimal
|
|
// 'x' lowercase hexadecimal
|
|
// 'X' uppercase hexadecimal
|
|
//
|
|
// It also supports the full suite of the fmt package format flags for integral
|
|
// types:
|
|
//
|
|
// '#' output base prefix:
|
|
// binary: 0b (%#b)
|
|
// octal: 0 (%#o)
|
|
// hex: 0x (%#x) or 0X (%#X)
|
|
// '-' pad with spaces on the right (left-justify field)
|
|
// '0' pad with leading zeros rather than spaces
|
|
//
|
|
// Finally, it supports specification of the minimum number of digits
|
|
// (precision) and output field width. Examples:
|
|
//
|
|
// %#.64x default width, precision 64, lowercase hex with 0x prefix
|
|
// %256b width 256, default precision, binary with leading zeros
|
|
// %12.3O width 12, precision 3, octal with 0o prefix
|
|
func (n Uint256) Format(s fmt.State, ch rune) {
|
|
// Determine output digits for the output base.
|
|
var digits []byte
|
|
switch ch {
|
|
case 'b':
|
|
digits = n.toBin()
|
|
case 'o', 'O':
|
|
digits = n.toOctal()
|
|
case 'd', 's', 'v':
|
|
digits = n.toDecimal()
|
|
case 'x':
|
|
digits = n.toHex()
|
|
case 'X':
|
|
digits = n.toHex()
|
|
for i, d := range digits {
|
|
if d >= 'a' && d <= 'f' {
|
|
digits[i] = 'A' + (d - 'a')
|
|
}
|
|
}
|
|
|
|
default:
|
|
fmt.Fprintf(s, "%%!%c(Uint256=%s)", ch, n.String())
|
|
return
|
|
}
|
|
|
|
// Determine prefix characters for the output base as needed.
|
|
var prefix string
|
|
if s.Flag('#') {
|
|
switch ch {
|
|
case 'b':
|
|
prefix = "0b"
|
|
case 'o':
|
|
prefix = "0"
|
|
case 'x':
|
|
prefix = "0x"
|
|
case 'X':
|
|
prefix = "0X"
|
|
}
|
|
}
|
|
if ch == 'O' {
|
|
prefix = "0o"
|
|
}
|
|
|
|
// Determine how many zeros to pad with based on whether or not a minimum
|
|
// number of digits to output is specified.
|
|
//
|
|
// Also, do not output anything when the minimum number of digits to output
|
|
// is zero and the uint256 is zero.
|
|
//
|
|
// Note that the zero padding might also be set below when the zero pad
|
|
// ('0') flag is specified and neither a precision nor the right justify
|
|
// ('-') flag is specified.
|
|
var zeroPad int
|
|
minDigits, isPrecisionSet := s.Precision()
|
|
if isPrecisionSet {
|
|
switch {
|
|
case len(digits) < minDigits:
|
|
zeroPad = minDigits - len(digits)
|
|
case minDigits == 0 && n.IsZero():
|
|
return
|
|
}
|
|
}
|
|
|
|
// Determine the left or right padding depending on whether or not a minimum
|
|
// number of characters to output is specified as well as the flags.
|
|
//
|
|
// A '-' flag indicates the output should be right justified and takes
|
|
// precedence over the zero pad ('0') flag. Per the above, the zero pad
|
|
// flag is ignored when a minimum number of digits is specified.
|
|
var leftPad, rightPad int
|
|
digitsPlusPrefixLen := len(prefix) + zeroPad + len(digits)
|
|
width, isWidthSet := s.Width()
|
|
if isWidthSet && digitsPlusPrefixLen < width {
|
|
switch pad := width - digitsPlusPrefixLen; {
|
|
case s.Flag('-'):
|
|
rightPad = pad
|
|
case s.Flag('0') && !isPrecisionSet:
|
|
zeroPad = pad
|
|
default:
|
|
leftPad = pad
|
|
}
|
|
}
|
|
|
|
// Produce the following output:
|
|
//
|
|
// [left pad][prefix][zero pad][digits][right pad]
|
|
var buf bytes.Buffer
|
|
buf.Grow(leftPad + len(prefix) + zeroPad + len(digits) + rightPad)
|
|
for i := 0; i < leftPad; i++ {
|
|
buf.WriteRune(' ')
|
|
}
|
|
buf.WriteString(prefix)
|
|
for i := 0; i < zeroPad; i++ {
|
|
buf.WriteRune('0')
|
|
}
|
|
buf.Write(digits)
|
|
for i := 0; i < rightPad; i++ {
|
|
buf.WriteRune(' ')
|
|
}
|
|
s.Write(buf.Bytes())
|
|
}
|
|
|
|
// PutBig sets the passed existing stdlib big integer to the value the uint256
|
|
// currently represents.
|
|
//
|
|
// This can sometimes be useful to reduce the number of allocations due to
|
|
// conversion if reusing the same variable is an option. The reason is that
|
|
// stdlib big integers internally allocate space on the heap to perform their
|
|
// operations and attempt to reuse that internal buffer when possible.
|
|
//
|
|
// Do note however that even when reusing a big integer, it will naturally still
|
|
// require an allocation for the internal buffer unless it has already allocated
|
|
// one large enough to be reused. Moreover, they often require further
|
|
// allocations while performing arithmetic, notably multiplication and division.
|
|
//
|
|
// Applications that are performance sensitive should consider avoiding
|
|
// conversion to big integers altogether when possible.
|
|
//
|
|
// See ToBig for a variant that returns the uint256 as a new stdlib big integer
|
|
// instead which can sometimes be more ergonomic in contexts where additional
|
|
// allocations are not a concern.
|
|
func (n *Uint256) PutBig(out *big.Int) {
|
|
b := n.Bytes()
|
|
out.SetBytes(b[:])
|
|
}
|
|
|
|
// ToBig returns the uint256 as a stdlib big integer.
|
|
//
|
|
// Note that this is nearly guaranteed to cause two allocations. Applications
|
|
// that are performance sensitive should consider using PutBig instead or avoid
|
|
// conversion to big integers altogether when possible.
|
|
//
|
|
// See PutBig for a variant that allows an existing big integer to be reused
|
|
// which can be useful to cut down on the number of allocations by allowing the
|
|
// caller to reuse a big integer that already has an internal buffer allocated.
|
|
func (n *Uint256) ToBig() *big.Int {
|
|
var out big.Int
|
|
n.PutBig(&out)
|
|
return &out
|
|
}
|
|
|
|
// SetBig sets the uint256 to the passed standard library big integer modulo
|
|
// 2^256.
|
|
//
|
|
// The resulting uint256 will be set to the 2's complement of the provided value
|
|
// when it is negative.
|
|
//
|
|
// The uint256 is returned to support chaining. This enables syntax like:
|
|
// n := new(Uint256).SetBig(n2).AddUint64(1) so that n = n2 + 1 where n2 is not
|
|
// modified.
|
|
//
|
|
// PERFORMANCE NOTE: When the caller expects values to potentially be larger
|
|
// than a max uint256, it is _highly_ recommended to reduce the value mod 2^256
|
|
// prior to calling this method for better performance.
|
|
//
|
|
// The reason is that this method requires an allocation and copy when the
|
|
// provided big integer is larger than a max uint256 in order to reduce it
|
|
// without modifying the provided arg. The caller can avoid this allocation by
|
|
// performing the mod 2^256 prior to calling this method with the value.
|
|
//
|
|
// More concretely, it is around 3 to 4 times faster to perform the reduction
|
|
// caller side as well as avoiding the allocation.
|
|
func (n *Uint256) SetBig(n2 *big.Int) *Uint256 {
|
|
// Take the value mod 2^256 if needed.
|
|
tmp := n2
|
|
if n2.BitLen() > 256 {
|
|
tmp = new(big.Int).And(n2, bigUint256Mask)
|
|
}
|
|
|
|
var buf [32]byte
|
|
tmp.FillBytes(buf[:]) // Requires Go 1.15.
|
|
n.SetBytes(&buf)
|
|
if tmp.Sign() < 0 {
|
|
n.Negate()
|
|
}
|
|
return n
|
|
}
|