Compare commits
1 Commits
simon/noise
...
lib25519
| Author | SHA1 | Date | |
|---|---|---|---|
| e8d8bfc55f |
+1
-1
@@ -102,7 +102,7 @@ default-members = [
|
||||
"explorer-api",
|
||||
]
|
||||
|
||||
exclude = ["explorer", "contracts", "clients/webassembly", "nym-wallet", "nym-connect/mobile/src-tauri", "nym-connect/desktop", "cpu-cycles"]
|
||||
exclude = ["explorer", "contracts", "clients/webassembly", "nym-wallet", "nym-connect/mobile/src-tauri", "nym-connect/desktop", "cpu-cycles", "lib-25519"]
|
||||
|
||||
[workspace.package]
|
||||
authors = ["Nym Technologies SA"]
|
||||
|
||||
Generated
+23
@@ -0,0 +1,23 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "lib_25519"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.146"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b"
|
||||
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "lib_25519"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
build = "build.rs"
|
||||
links = "25519"
|
||||
|
||||
[dependencies]
|
||||
libc = "0.2.140"
|
||||
|
||||
[build-dependencies]
|
||||
cfg-if = "1"
|
||||
@@ -0,0 +1,69 @@
|
||||
use std::{env, path::PathBuf, process::Command};
|
||||
|
||||
fn main() {
|
||||
let out_dir = env::var("OUT_DIR").unwrap();
|
||||
let out_path = PathBuf::from(&out_dir);
|
||||
let source_path = PathBuf::from("lib25519")
|
||||
.canonicalize()
|
||||
.expect("cannot canonicalize path");
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "mips", target_arch = "powerpc", target_arch = "powerpc64", target_arch = "arm")))] {
|
||||
panic!("Unsupported architecture - {}!", env::var("CARGO_CFG_TARGET_ARCH").unwrap(), )
|
||||
}
|
||||
};
|
||||
|
||||
let mut compile_o_command = Command::new("./configure");
|
||||
let compile_o_command = compile_o_command
|
||||
.current_dir(&source_path)
|
||||
.arg(format!("--prefix={out_dir}"));
|
||||
|
||||
match compile_o_command.output() {
|
||||
Ok(output) => {
|
||||
if !output.status.success() {
|
||||
panic!("{:?}", unsafe {
|
||||
std::str::from_utf8_unchecked(&output.stderr)
|
||||
})
|
||||
}
|
||||
}
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
|
||||
let mut compile_o_command = Command::new("make");
|
||||
let compile_o_command = compile_o_command
|
||||
.current_dir(&source_path)
|
||||
.arg("-j8")
|
||||
.arg("install");
|
||||
|
||||
match compile_o_command.output() {
|
||||
Ok(output) => {
|
||||
if !output.status.success() {
|
||||
panic!("{:?}", unsafe {
|
||||
std::str::from_utf8_unchecked(&output.stderr)
|
||||
})
|
||||
}
|
||||
}
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
|
||||
println!(
|
||||
"cargo:rustc-link-search=native={}",
|
||||
out_path.join("lib").to_str().unwrap()
|
||||
);
|
||||
println!("cargo:rustc-link-lib=static=25519");
|
||||
println!("cargo:rustc-link-lib=static=randombytes_kernel");
|
||||
|
||||
let mut compile_o_command = Command::new("make");
|
||||
let compile_o_command = compile_o_command.current_dir(source_path).arg("clean");
|
||||
|
||||
match compile_o_command.output() {
|
||||
Ok(output) => {
|
||||
if !output.status.success() {
|
||||
panic!("{:?}", unsafe {
|
||||
std::str::from_utf8_unchecked(&output.stderr)
|
||||
})
|
||||
}
|
||||
}
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
Files: *
|
||||
Copyright: 2022 Kaushik Nath
|
||||
Copyright: 2011-2022 Daniel J. Bernstein
|
||||
Copyright: 2015 Tung Chou
|
||||
Copyright: 2011-2014 Peter Schwabe
|
||||
Copyright: 2011 Niels Duif
|
||||
Copyright: 2011 Tanja Lange
|
||||
Copyright: 2011 Bo-Yin Yang
|
||||
License: CC0-1.0
|
||||
@@ -0,0 +1,297 @@
|
||||
lib25519 draws on many previous implementations listed below, plus new
|
||||
speedups from Kaushik Nath and new infrastructure work and factoring
|
||||
from Daniel J. Bernstein. All software is in the public domain. Since
|
||||
some organizations require licenses, lib25519 is also CC0-licensed.
|
||||
|
||||
Some code was originally copied from public-domain code in the SUPERCOP
|
||||
benchmarking framework. See https://bench.cr.yp.to/supercop.html for
|
||||
SUPERCOP releases. The following small changes from code available in
|
||||
SUPERCOP are made in lib25519 without further comment:
|
||||
|
||||
* Returning void rather than int for functions that never fail in
|
||||
lib25519.
|
||||
* Message lengths long long rather than unsigned long long.
|
||||
* Defining various constants in .c files (to simplify PIC handling)
|
||||
instead of .S files.
|
||||
* Moving some C files to shared-*.c (which in lib25519 means that
|
||||
these files are compiled by only one compiler).
|
||||
* Using CRYPTO_SHARED_NAMESPACE rather than CRYPTO_NAMESPACE for
|
||||
symbols defined in *.S and shared-*.c.
|
||||
* Adding various "linker define" and "linker use" lines.
|
||||
|
||||
Larger changes from code in SUPERCOP, such as new code divisions across
|
||||
lib25519 primitives, are indicated below.
|
||||
|
||||
Sources of Curve25519 software (this is not a comprehensive list, just
|
||||
the software that lib25519 is derived from):
|
||||
|
||||
* Daniel J. Bernstein. "Curve25519: new Diffie-Hellman speed
|
||||
records." Pages 207–228 in Public key cryptography—PKC 2006, 9th
|
||||
international conference on theory and practice in public-key
|
||||
cryptography, New York, NY, USA, April 24–26, 2006, proceedings,
|
||||
edited by Moti Yung, Yevgeniy Dodis, Aggelos Kiayias, Tal Malkin,
|
||||
Lecture Notes in Computer Science 3958, Springer, 2006, ISBN
|
||||
3-540-33851-9.
|
||||
|
||||
This is the source of the Curve25519 design, the X25519 design, and
|
||||
various speedups. Most of the software from that paper is specific
|
||||
to a variety of 32-bit platforms (radix 2^25.5 or radix 2^21.25),
|
||||
but the portable supercop/crypto_scalarmult/curve25519/ref10 (radix
|
||||
2^25.5) is derived from this.
|
||||
|
||||
lib25519/crypto_nP/montgomery25519/ref10 starts with
|
||||
supercop/crypto_scalarmult/curve25519/ref10, and tweaks the API to
|
||||
provide crypto_nP instead of crypto_scalarmult. Inversion is
|
||||
factored out, producing crypto_pow/inv25519/ref10. The trivial
|
||||
crypto_scalarmult_base wrapper is factored out, producing
|
||||
crypto_nG/montgomery25519/ref/base.c; lib25519 has faster nG
|
||||
functions, but intentionally provides ref for situations where
|
||||
speed is outweighed by simplicity, assurance, code size, etc.
|
||||
|
||||
* supercop/crypto_scalarmult/curve25519/donna_c64 (radix 2^51) from
|
||||
Adam Langley.
|
||||
|
||||
lib25519/crypto_nP/montgomery25519/donna_c64 starts from this and
|
||||
tweaks the API to provide crypto_nP instead of crypto_scalarmult
|
||||
(and removes crypto_scalarmult_base). crypto_pow/inv25519/donna_c64
|
||||
is factored out.
|
||||
|
||||
* Daniel J. Bernstein, Niels Duif, Tanja Lange, Peter Schwabe, Bo-Yin
|
||||
Yang. "High-speed high-security signatures." Pages 124–142 in
|
||||
Cryptographic hardware and embedded systems—CHES 2011, 13th
|
||||
international workshop, Nara, Japan, September 28–October 1, 2011,
|
||||
proceedings, edited by Bart Preneel, Tsuyoshi Takagi, Lecture Notes
|
||||
in Computer Science 6917, Springer, 2011, ISBN 978-3-642-23950-2.
|
||||
Journal version: Journal of Cryptographic Engineering 2 (2012),
|
||||
77–89.
|
||||
|
||||
This is the source of the Ed25519 design and various X25519/Ed25519
|
||||
speedups for 64-bit Intel/AMD platforms, in particular producing
|
||||
supercop/crypto_{scalarmult/curve,sign/ed}25519/amd64-{51,64}*
|
||||
(radix 2^51 and radix 2^64 respectively). Peter Schwabe led the
|
||||
implementation work.
|
||||
|
||||
lib25519/crypto_nP/montgomery25519/amd64-51 starts from
|
||||
supercop/crypto_scalarmult/curve25519/amd64-51 and tweaks the API
|
||||
to provide crypto_nP instead of crypto_scalarmult (and removes
|
||||
crypto_scalarmult_base). crypto_nG/merged25519/amd64-51 (for
|
||||
fixed-base-point multiplication), crypto_mGnP/ed25519/amd64-51 (for
|
||||
double-scalar multiplication), and crypto_sign/ed25519/amd64 (for
|
||||
the remaining signing components) factor
|
||||
supercop/crypto_sign/ed25519/amd64-51 into smaller pieces.
|
||||
crypto_pow/inv25519/amd64-51 is also factored out. "SMALLTABLES"
|
||||
support is removed. Support for batch verification is removed,
|
||||
although it could reappear in a subsequent lib25519 release.
|
||||
|
||||
Similar comments apply to amd64-64 and ref10. A compiler warning
|
||||
is eliminated (window size 64 in amd64-64-24k/sc25519.h).
|
||||
|
||||
* Tung Chou. "Sandy2x: New Curve25519 Speed Records." SAC 2015.
|
||||
|
||||
This is the source of various X25519 speedups using 256-bit vector
|
||||
instructions, specifically AVX vector instructions in Intel's Sandy
|
||||
Bridge, in particular producing
|
||||
supercop/crypto_scalarmult/curve25519/sandy2x (radix 2^25.5).
|
||||
|
||||
lib25519/crypto_{nP,nG}/montgomery25519/sandy2x start from
|
||||
supercop/crypto_scalarmult/curve25519/sandy2x, and tweak the API to
|
||||
provide crypto_nP and crypto_nG instead of crypto_scalarmult and
|
||||
crypto_scalarmult_base respectively. The top bit of the incoming
|
||||
point is cleared. crypto_pow/inv25519/sandy2x is factored out.
|
||||
|
||||
* Kaushik Nath and Palash Sarkar, "Efficient arithmetic in
|
||||
(pseudo-)Mersenne prime order fields", Advances in Mathematics of
|
||||
Communications 16 (2022), pages 303–348.
|
||||
|
||||
Original release:
|
||||
https://github.com/kn-cs/pmp-farith/tree/master/gf-p2-255-19/SL
|
||||
https://github.com/kn-cs/pmp-farith/tree/master/gf-p2-255-19/USL1
|
||||
|
||||
The "SL" software is the source of various speedups to the amd64-64
|
||||
software, producing the "maa4" versions of fe25519_mul.S,
|
||||
fe25519_square.S, and fe25519_nsquare.S. These .S files are used
|
||||
inside the following lib25519 directories:
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maa4
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maa4
|
||||
crypto_mGnP/ed25519/amd64-maa4
|
||||
crypto_nG/merged25519/amd64-avx2-10l-maa4
|
||||
crypto_nG/merged25519/amd64-avx2-9l-maa4
|
||||
crypto_nG/merged25519/amd64-maa4
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey10l-maa4
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey9l-maa4
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns10l-maa4
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns9l-maa4
|
||||
crypto_nP/montgomery25519/amd64-maa4
|
||||
crypto_pow/inv25519/amd64-maa4
|
||||
|
||||
The "USL" software is the source of various speedups to the
|
||||
amd64-51 software, producing the "maa5" versions of fe25519_mul.S
|
||||
and fe25519_nsquare.S. These .S files are used inside the following
|
||||
lib25519 directories:
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey10l-maa5
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey9l-maa5
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns10l-maa5
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns9l-maa5
|
||||
crypto_pow/inv25519/amd64-maa5
|
||||
|
||||
* Kaushik Nath and Palash Sarkar, "Security and efficiency trade-offs
|
||||
for elliptic curve Diffie-Hellman at the 128-bit and 224-bit
|
||||
security levels." J. Cryptogr. Eng. 12(1): 107-121 (2022).
|
||||
|
||||
Original release:
|
||||
https://github.com/kn-cs/x25519/tree/master/intel64-mxaa-4limb
|
||||
https://github.com/kn-cs/x25519
|
||||
|
||||
This "mxaa-4limb" software is the source of various speedups to
|
||||
"maa4" on CPUs supporting BMI2 instructions (e.g., Intel Haswell
|
||||
from 2013), in particular producing the "mxaa" versions of
|
||||
fe25519_mul.S and fe25519_nsquare.S. These .S files are used inside
|
||||
the following lib25519 directories:
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-mxaa
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-mxaa
|
||||
crypto_mGnP/ed25519/amd64-mxaa
|
||||
crypto_nG/merged25519/amd64-avx2-10l-mxaa
|
||||
crypto_nG/merged25519/amd64-avx2-9l-mxaa
|
||||
crypto_nG/merged25519/amd64-mxaa
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey10l-mxaa
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey9l-mxaa
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns10l-mxaa
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns9l-mxaa
|
||||
crypto_nP/montgomery25519/amd64-mxaa
|
||||
crypto_pow/inv25519/amd64-mxaa
|
||||
|
||||
This software is also the source of the following three different
|
||||
Montgomery-ladder functions, where the third also builds on the
|
||||
"maax" work listed below:
|
||||
crypto_nP/montgomery25519/amd64-maa4/mladder.S
|
||||
crypto_nP/montgomery25519/amd64-mxaa/mladder.S
|
||||
crypto_nP/montgomery25519/amd64-maax/mladder.S
|
||||
|
||||
* Kaushik Nath and Palash Sarkar, "Efficient arithmetic in
|
||||
(pseudo-)Mersenne prime order fields", Advances in Mathematics of
|
||||
Communications 16 (2022), pages 303–348. Original release:
|
||||
https://github.com/kn-cs/pmp-farith/tree/master/gf-p2-255-19/SLDCC
|
||||
|
||||
This is the source of various speedups to "mxaa" on CPUs that also
|
||||
support ADX instructions (e.g., Intel Broadwell from 2014), in
|
||||
particular producing the "maax" versions of fe25519_mul.S,
|
||||
fe25519_square.S, and fe25519_nsquare.S. These .S files are used
|
||||
inside the following lib25519 directories:
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maax
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maax
|
||||
crypto_mGnP/ed25519/amd64-avx512ifma-5l-maax
|
||||
crypto_mGnP/ed25519/amd64-maax
|
||||
crypto_nG/merged25519/amd64-avx2-10l-maax
|
||||
crypto_nG/merged25519/amd64-avx2-9l-maax
|
||||
crypto_nG/merged25519/amd64-avx512ifma-5l-maax
|
||||
crypto_nG/merged25519/amd64-maax
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-hey10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-hey9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-ns10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-ns9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512ifma-hey5l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512ifma-ns5l-maax
|
||||
crypto_nP/montgomery25519/amd64-maax
|
||||
crypto_pow/inv25519/amd64-maax
|
||||
|
||||
* Kaushik Nath and Palash Sarkar, "Efficient 4-Way Vectorizations of
|
||||
the Montgomery Ladder". IEEE Trans. Computers 71(3): 712-723
|
||||
(2022). Original release:
|
||||
https://github.com/kn-cs/vec-ladder/tree/master/Curve25519
|
||||
|
||||
This is the source of the "hey10l" (radix 2^25.5), "hey9l" (radix
|
||||
2^29), "ns10l" (radix 2^25.5), and "ns9l" (radix 2^29) versions of
|
||||
mladder.S for CPUs that also support 256-bit AVX2 instructions
|
||||
(e.g., Intel Haswell from 2013). In lib25519, these four .S files
|
||||
are used in 16 directories:
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey10l-{maa4,maa5,maax,mxaa}
|
||||
crypto_nP/montgomery25519/amd64-avx2-hey9l-{maa4,maa5,maax,mxaa}
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns10l-{maa4,maa5,maax,mxaa}
|
||||
crypto_nP/montgomery25519/amd64-avx2-ns9l-{maa4,maa5,maax,mxaa}
|
||||
|
||||
* Kaushik Nath, new Montgomery-ladder code new in lib25519 (no paper
|
||||
yet) for CPUs supporting AVX-512 instructions (e.g., Intel
|
||||
Skylake-X from 2017). These are six files in lib25519:
|
||||
crypto_nP/montgomery25519/amd64-avx512-hey10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-hey9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-ns10l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512-ns9l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512ifma-hey5l-maax
|
||||
crypto_nP/montgomery25519/amd64-avx512ifma-ns5l-maax
|
||||
|
||||
* Kaushik Nath, nine versions of fixed-base-point
|
||||
scalar-multiplication code new in lib25519 (no paper yet) for
|
||||
various platforms:
|
||||
crypto_nG/merged25519/amd64-avx2-10l-maa4/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx2-10l-maax/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx2-10l-mxaa/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx2-9l-maa4/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx2-9l-maax/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx2-9l-mxaa/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-avx512ifma-5l-maax/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-maa4/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-maax/ge25519_base.S
|
||||
crypto_nG/merged25519/amd64-mxaa/ge25519_base.S
|
||||
|
||||
* Kaushik Nath, ten versions of double-scalar-multiplication code new
|
||||
in lib25519 (no paper yet) for various platforms. Each version has
|
||||
precompute.S and process.S:
|
||||
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maa4/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maax/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-mxaa/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maa4/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maax/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-mxaa/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-avx512ifma-5l-maax/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-maa4/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-maax/ge25519_double_scalarmult_precompute.S
|
||||
crypto_mGnP/ed25519/amd64-mxaa/ge25519_double_scalarmult_precompute.S
|
||||
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maa4/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-maax/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-10l-mxaa/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maa4/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-maax/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx2-9l-mxaa/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-avx512ifma-5l-maax/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-maa4/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-maax/ge25519_double_scalarmult_process.S
|
||||
crypto_mGnP/ed25519/amd64-mxaa/ge25519_double_scalarmult_process.S
|
||||
|
||||
Almost all of the crypto_pow/inv25519 implementations use exponentiation,
|
||||
but there is also a different implementation from the following source:
|
||||
|
||||
* Daniel J. Bernstein, Bo-Yin Yang. "Fast constant-time gcd
|
||||
computation and modular inversion." IACR Transactions on
|
||||
Cryptographic Hardware and Embedded Systems 2019 issue 3 (2019),
|
||||
340–398.
|
||||
|
||||
This is the source of the "safegcd" algorithm and software. Further
|
||||
speedups (no paper yet; ideas from Peter Dettman, Gregory Maxwell,
|
||||
and Pieter Wuille) have produced the "inverse25519skylake" software
|
||||
available here: https://gcd.cr.yp.to/software.html
|
||||
|
||||
lib25519/crypto_pow/inv25519/amd64-safegcd is inverse25519skylake,
|
||||
tweaked to provide the crypto_pow API and to clear the top bit of
|
||||
the input.
|
||||
|
||||
For lower-layer SHA-512 functions:
|
||||
|
||||
* Daniel J. Bernstein, supercop/crypto_hash*/sha512/*. In lib25519,
|
||||
some unused variables are removed in crypto_hashblocks/sha512/avx
|
||||
to eliminate compiler warnings.
|
||||
|
||||
Most of the lib25519 infrastructure, such as the run-time implementation
|
||||
selector automatically guided by CPU type and previous benchmarks, is
|
||||
new in lib25519 from Daniel J. Bernstein. Portions of autogen-speed
|
||||
(generating lib25519-speed.c) and autogen-test (generating
|
||||
lib25519-test.c) are based on benchmarking software and test software in
|
||||
SUPERCOP by Daniel J. Bernstein. The symmetric-cryptography code for
|
||||
generating pseudorandom test inputs and hashing test outputs is adapted
|
||||
from TweetNaCl, a library by Daniel J. Bernstein, Wesley Janssen, Tanja
|
||||
Lange, and Peter Schwabe.
|
||||
@@ -0,0 +1,53 @@
|
||||
Prerequisites: python3; gcc and/or clang.
|
||||
|
||||
For sysadmins, to install in /usr/local/{include,lib,bin}:
|
||||
|
||||
./configure && make -j8 install
|
||||
|
||||
For users, to install in $HOME/{include,lib,bin}:
|
||||
|
||||
./configure --prefix=$HOME && make -j8 install
|
||||
|
||||
For distributors creating a package: Run
|
||||
|
||||
./configure --prefix=/usr && make -j8
|
||||
|
||||
and then follow your usual packaging procedures for the .h files in
|
||||
build/0/package/include, the libraries in build/0/package/lib, and the
|
||||
test programs in build/0/package/bin.
|
||||
|
||||
The long-term plan is to split off some components of lib25519 into
|
||||
their own packages, and distributors may already wish to package these
|
||||
components accordingly:
|
||||
|
||||
* -lcpucycles will be its own package.
|
||||
|
||||
* -lrandombytes_kernel will be its own package.
|
||||
|
||||
* -lrandombytes will be an indirection layer providing randombytes.h
|
||||
and supporting an ecosystem of randombytes() implementations (via,
|
||||
e.g., Debian's /etc/alternatives), such as -lrandombytes_kernel,
|
||||
-lrandombytes_per_process_rng, etc.
|
||||
|
||||
* -l25519 will be the main lib25519 package, using -lrandombytes and
|
||||
-lcpucycles.
|
||||
|
||||
lib25519-test already uses -l25519 without -lrandombytes: it substitutes
|
||||
its own knownrandombytes() to generate test vectors.
|
||||
|
||||
More options: You can run
|
||||
|
||||
./configure --host=amd64
|
||||
|
||||
to override ./configure's guess of the architecture that it should
|
||||
compile for. The architecture controls which implementations to try
|
||||
(see crypto_*/*/*/architectures) and which compilers to try (see
|
||||
compilers/*).
|
||||
|
||||
Inside the build directory, 0 is symlinked to amd64 for --host=amd64.
|
||||
Running "make clean" removes build/amd64. Re-running ./configure
|
||||
automatically starts with "make clean".
|
||||
|
||||
A subsequent ./configure --host=arm will create build/arm and symlink
|
||||
0 -> arm, without touching an existing build/amd64. However,
|
||||
cross-compilers aren't yet selected automatically.
|
||||
@@ -0,0 +1,405 @@
|
||||
This file explains the internal structure of lib25519, and explains how
|
||||
to add new instruction sets and new implementations.
|
||||
|
||||
## Primitives
|
||||
|
||||
The directories `crypto_*/*` inside lib25519 define the following
|
||||
primitives (see also `autogen-test` for Python versions of the
|
||||
mathematical primitives):
|
||||
|
||||
* `crypto_verify/32`: `crypto_verify_32(s,t)` returns 0 when the 32-byte
|
||||
arrays `s` and `t` are equal, otherwise `-1`. This function takes
|
||||
constant time.
|
||||
|
||||
* `crypto_hashblocks/sha512`: `crypto_hashblocks_sha512(h,x,xlen)`
|
||||
updates an intermediate SHA-512 hash `h` using all of the full
|
||||
128-byte blocks at the beginning of the `xlen`-byte array `x`, and
|
||||
returns the number of bytes left over, namely `xlen` mod 128. This
|
||||
function takes time that depends on `xlen` but not on the contents of
|
||||
`h` or `x`.
|
||||
|
||||
* `crypto_hash/sha512`: `crypto_hash_sha512(h,x,xlen)` computes the
|
||||
SHA-512 hash `h` of the `xlen`-byte array `x`. This function takes
|
||||
time that depends on `xlen` but not on the contents of `x`.
|
||||
|
||||
* `crypto_pow/inv25519`: `crypto_pow_inv25519(y,x)` computes the
|
||||
2^255^−21 power `y` of an integer `x` modulo 2^255^−19. This is the
|
||||
same as the inverse of `x` modulo 2^255^−19 if `x` is not divisible by
|
||||
2^255^−19. The integers `x` and `y` are represented as a 32-byte array
|
||||
in little-endian form. This function takes constant time.
|
||||
|
||||
This function guarantees that the output `y` is frozen modulo
|
||||
2^255^−19, i.e., completely reduced to the range 0,1,...,2^255^−20. The
|
||||
caller is expected to freeze `x` before calling this function. The
|
||||
function accepts `x` in the range {0,1,...,2^256^−1} while ignoring the
|
||||
top bit (the coefficient of 2^255^ in binary): i.e., the function
|
||||
reduces `x` modulo 2^255^ and then modulo 2^255^−19.
|
||||
|
||||
* `crypto_nP/montgomery25519`: `crypto_nP_montgomery25519(nP,n,P)`
|
||||
computes the X25519 function: in short, if a Curve25519 point has
|
||||
x-coordinate `P` then the `n`th multiple of the point has x-coordinate
|
||||
`nP`. The inputs and outputs are represented as 32-byte arrays in
|
||||
little-endian form. This function takes constant time.
|
||||
|
||||
X25519 is defined for `n` in the range 2^254^ + 8{0,1,2,3,...,2^251^−1}.
|
||||
`crypto_nP_montgomery25519` allows `n` in the wider range
|
||||
{0,1,...,2^256^−1}, and in all cases computes `m`th multiples where `m`
|
||||
is defined as follows: make a copy of `n`, clear the top bit, set the
|
||||
next bit, and clear the bottom three bits.
|
||||
|
||||
X25519 guarantees that the output `nP` is frozen. It does not require
|
||||
the input to be frozen; also, it allows the input to be on the twist,
|
||||
and to have small order.
|
||||
|
||||
`crypto_nP_montgomery25519` clears the top bit of `P` before applying
|
||||
the X25519 function. Callers that want the X25519 function on `P` with
|
||||
the top bit set have to reduce modulo 2^255^−19 for themselves.
|
||||
|
||||
* `crypto_nG/merged25519`: `crypto_nG_merged25519(nG,n)` reads an
|
||||
integer `n` in the range {0,1,...,2^256^−1} and outputs a frozen
|
||||
integer `nG` modulo 2^255^−19, possibly with the top bit set (i.e.,
|
||||
adding 2^255^) as described below. Both `n` and `nG` are represented
|
||||
as 32-byte arrays in little-endian form. This function takes constant
|
||||
time.
|
||||
|
||||
If the top bit of `n` is clear then `nG` is the Edwards y-coordinate
|
||||
of the `n`th multiple of G, and the top bit is set exactly when the
|
||||
Edwards x-coordinate is odd. Otherwise `nG` is the Montgomery
|
||||
x-coordinate of the (`n`−2^255^)th multiple of G, and the top bit is
|
||||
clear. Here G is the standard Curve25519 base point, which has
|
||||
Montgomery x-coordinate 9, Edwards y-coordinate 4/5, and even Edwards
|
||||
x-coordinate.
|
||||
|
||||
* `crypto_nG/montgomery25519`: `crypto_nG_montgomery25519(nG,n)` is
|
||||
the same as `crypto_nP_montgomery(nG,n,G)` where `G` is the array
|
||||
{9,0,0,...,0}. This function takes constant time.
|
||||
|
||||
The point of `crypto_nG` is to save time (using a small table
|
||||
precomputed from `G`) compared to the more general `crypto_nP`. This
|
||||
has the disadvantage of being more complicated, which is particularly
|
||||
important given that lib25519 has not yet been verified, and in any
|
||||
case increases code size noticeably for X25519. There is a `ref`
|
||||
implementation of `crypto_nG` that simply calls `crypto_nP`, and
|
||||
setting sticky bits on the other implementation directories
|
||||
(`chmod +t crypto_nG/montgomery25519/*; chmod -t crypto_nG/montgomery25519/ref`)
|
||||
will force lib25519 to use `ref`.
|
||||
|
||||
* `crypto_mGnP/ed25519`: `crypto_mGnP_ed25519(mGnP,m,n,P)` computes
|
||||
`(m mod L)G−(n mod L)P` in Edwards coordinates, where `L` is the prime
|
||||
number 2^252^+27742317777372353535851937790883648493 and `G` is the
|
||||
same standard base point. This function takes time that depends on the
|
||||
inputs.
|
||||
|
||||
The input `m` is an integer in the range {0,1,...,2^256^−1}
|
||||
represented as a 32-byte array in little-endian form. Any `m` outside
|
||||
the range {0,1,...,L−1} triggers a failure, which is reported as
|
||||
described below.
|
||||
|
||||
The input `n` is an integer in the range {0,1,...,2^512^−1}
|
||||
represented as a 64-byte array in little-endian form.
|
||||
|
||||
The input point `P` is represented as a 32-byte array as follows: the
|
||||
(frozen) Edwards y-coordinate of `P` in {0,1,...,2^255^−20} is stored
|
||||
in little-endian form, and then the top bit is set exactly when the
|
||||
(frozen) Edwards x-coordinate of `P` is odd. An input 32-byte array
|
||||
that does not have this form is instead interpreted as the point `P`
|
||||
with Edwards coordinates (...8,26), and triggers a failure, reported
|
||||
as described below.
|
||||
|
||||
The output is a 33-byte array. The first 32 bytes are the output point
|
||||
`(m mod L)G−(n mod L)P`, represented the same way as `P`. The last
|
||||
byte is 1 on success and 0 on failure.
|
||||
|
||||
* `crypto_dh/x25519`: `crypto_dh_x25519_keypair(pk,sk)` generates a
|
||||
32-byte X25519 public key `pk` and the corresponding 32-byte secret
|
||||
key `sk`. This function is the composition of `randombytes` to
|
||||
generate `sk` and `crypto_nG_montgomery25519` to generate `pk`.
|
||||
|
||||
`crypto_dh_x25519(k,pk,sk)` generates a 32-byte shared secret `k`
|
||||
given a public key `pk` and a secret key `sk`. This function is the
|
||||
same as `crypto_nP_montgomery25519`.
|
||||
|
||||
* `crypto_sign/ed25519`: `crypto_sign_ed25519_keypair(pk,sk)` generates
|
||||
a 32-byte Ed25519 public key `pk` and the corresponding 64-byte secret
|
||||
key `sk`. This function takes constant time.
|
||||
|
||||
`crypto_sign_ed25519(sm,&smlen,m,mlen,sk)` generates an `smlen`-byte
|
||||
signed message `sm` given an `mlen`-byte message `m` and a secret key
|
||||
`sk`. The caller is required to allocate `mlen+64` bytes for `sm`. The
|
||||
function always sets `smlen` to `mlen+64`. This function takes time
|
||||
that depends on `mlen` but not on the other inputs.
|
||||
|
||||
`crypto_sign_ed25519_open(m,&mlen,sm,smlen,pk)` generates an
|
||||
`mlen`-byte message `m` given an `smlen`-byte signed message `sm` and
|
||||
a public key `pk`, and returns 0. However, if `sm` is invalid, this
|
||||
function returns `-1`, sets `mlen` to `-1`, and clears `m`. The caller is
|
||||
required to allocate `smlen` (not just `smlen-64`) bytes for `m`, for
|
||||
example using the same array for `sm` and `m`. This function takes time
|
||||
that depends on its inputs.
|
||||
|
||||
lib25519 includes a command-line utility `lib25519-test` that runs some
|
||||
tests for each of these primitives, and another utility `lib25519-speed`
|
||||
that measures cycle counts for each of these primitives.
|
||||
|
||||
The stable lib25519 API functions are built from the above primitives:
|
||||
|
||||
* `lib25519_dh_keypair` is `crypto_dh_x25519_keypair`.
|
||||
* `lib25519_dh` is `crypto_dh_x25519`.
|
||||
* `lib25519_sign_keypair` is `crypto_sign_ed25519_keypair`.
|
||||
* `lib25519_sign` is `crypto_sign_ed25519`.
|
||||
* `lib25519_sign_open` is `crypto_sign_ed25519_open`.
|
||||
|
||||
Some changes are anticipated in the list of primitives, but these API
|
||||
functions will remain stable.
|
||||
|
||||
As in SUPERCOP and NaCl, message lengths intentionally use `long long`,
|
||||
not `size_t`. In lib25519, message lengths are signed.
|
||||
|
||||
## Implementations
|
||||
|
||||
A single primitive can, and usually does, have multiple implementations.
|
||||
Each implementation is in its own subdirectory. The implementations are
|
||||
required to have exactly the same input-output behavior, and to some
|
||||
extent this is tested, although it is not yet formally verified.
|
||||
|
||||
Different implementations typically offer different tradeoffs between
|
||||
portability, simplicity, and efficiency. For example,
|
||||
`crypto_nP/montgomery25519/ref10` is portable;
|
||||
`crypto_nP/montgomery25519/amd64-maax` is faster and less portable.
|
||||
|
||||
Each unportable implementation has an `architectures` file. Each line in
|
||||
this file identifies a CPU instruction set (and ABI) where the
|
||||
implementation works. For example,
|
||||
`crypto_nP/montgomery25519/amd64-maax/architectures` has one line
|
||||
`amd64 bmi2 adx`, meaning that the implementation works on CPUs that
|
||||
have the Intel/AMD 64-bit instruction set with the BMI2 and ADX
|
||||
instruction-set extensions. The top-level `compilers` directory shows
|
||||
(among other things) the allowed instruction-set names such as `bmi2`.
|
||||
|
||||
At run time, lib25519 checks the CPU where it is running, and selects
|
||||
an implementation where `architectures` is compatible with that CPU.
|
||||
Each primitive makes its own selection once per program startup, using
|
||||
the compiler's `ifunc` mechanism. This type of run-time selection means,
|
||||
for example, that an `amd64` CPU without AVX2 can share binaries with an
|
||||
`amd64` CPU with AVX2. However, correctness requires instruction sets to
|
||||
be preserved by migration across cores via the OS kernel, VM migration,
|
||||
etc.
|
||||
|
||||
The compiler has a `target` mechanism that makes an `ifunc` selection
|
||||
based on CPU architectures. Instead of using the `target` mechanism,
|
||||
lib25519 uses a more sophisticated mechanism that also accounts for
|
||||
benchmarks collected in advance of compilation.
|
||||
|
||||
## Compilers
|
||||
|
||||
lib25519 tries different C compilers for each implementation. For
|
||||
example, `compilers/default` lists the following compilers:
|
||||
|
||||
gcc -Wall -fPIC -fwrapv -O2
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2
|
||||
|
||||
Sometimes `gcc` produces better code, and sometimes `clang` produces
|
||||
better code.
|
||||
|
||||
As another example, `compilers/amd64+avx` lists the following compilers:
|
||||
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mtune=sandybridge
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mtune=sandybridge
|
||||
|
||||
The `-mavx` option tells these compilers that they are free to use the
|
||||
AVX instruction-set extension.
|
||||
|
||||
Code compiled using the compilers in `compilers/amd64+avx` will be
|
||||
considered at run time by the lib25519 selection mechanism if the
|
||||
`supports()` function in `compilers/amd64+avx.c` returns nonzero. This
|
||||
function checks whether the run-time CPU supports AVX (and SSE and so on,
|
||||
and OSXSAVE with XMM/YMM being saved;
|
||||
[https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100)
|
||||
says that all versions of gcc until 2018 handled this incorrectly in
|
||||
`target`). Similar comments apply to other `compilers/*` files.
|
||||
|
||||
If some compilers fail (for example, clang is not installed, or the
|
||||
compiler version is too old to support the compiler options used in
|
||||
lib25519), the lib25519 compilation process will try its best to produce
|
||||
a working library using the remaining compilers, even if this means
|
||||
lower performance.
|
||||
|
||||
## Trimming
|
||||
|
||||
By default, to reduce size of the compiled library, the lib25519
|
||||
compilation process trims the library down to the implementations that
|
||||
are selected by lib25519's selection mechanism (across all CPUs; the
|
||||
library remains portable, not tied to the compilation CPU).
|
||||
|
||||
This trimming is handled at link time rather than compile time to
|
||||
increase the chance that, even if some implementations are broken by
|
||||
compiler "upgrades", the library will continue to build successfully.
|
||||
|
||||
To avoid this trimming, pass the `--notrim` option to `./configure`.
|
||||
All implementations that compile are then included in the library,
|
||||
tested by `lib2519-test`, and measured by `lib25519-speed`. You'll want
|
||||
to avoid trimming if you're adding new instruction sets or new
|
||||
implementations (see below), so that you can run tests and benchmarks of
|
||||
code that isn't selected yet.
|
||||
|
||||
## How to recompile after changes
|
||||
|
||||
If you make changes in the lib25519 source directory, you have to run
|
||||
`./configure` again to repopulate the build directory. Simply running
|
||||
`make` again doesn't suffice.
|
||||
|
||||
By default, `./configure` cleans the build directory first, so `make`
|
||||
will recompile everything. This can be on the scale of seconds if you
|
||||
have enough cores, but maybe you're developing on a slower machine. Two
|
||||
options are currently available to accelerate the edit-compile cycle:
|
||||
|
||||
* There is an experimental `--noclean` option to `./configure` that,
|
||||
for some simple types of changes, can produce a successful build
|
||||
without cleaning.
|
||||
|
||||
* You can disable the implementations you're not using by setting
|
||||
sticky bits on the source directories for those implementations:
|
||||
e.g., `chmod +t crypto_nG/*/*avx2*`.
|
||||
|
||||
Make sure to reenable all implementations and do a full clean build if
|
||||
you're collecting data to add to the source `benchmarks` directory.
|
||||
|
||||
## How to add new instruction sets
|
||||
|
||||
Adding another file `compilers/amd64+foo`, along with a `supports()`
|
||||
implementation in `compilers/amd64+foo.c`, will support a new
|
||||
instruction set. Do not assume that the new `foo` instruction set
|
||||
implies support for older instruction sets (the idea of "levels" of
|
||||
instruction sets); instead make sure to include the older instruction
|
||||
sets in `+` tags, as illustrated by
|
||||
`compilers/amd64+avx+bmi2+avx2+adx+avx512f+avx512vl+avx512ifma`.
|
||||
|
||||
In the compiler options, always make sure to include `-fPIC` to support
|
||||
shared libraries, and `-fwrapv` to switch to a slightly less dangerous
|
||||
version of C.
|
||||
|
||||
The `foo` tags don't have to be instruction sets. For example, if a CPU
|
||||
has the same instruction set but wants different optimizations because
|
||||
of differences in instruction timings, you can make a tag for those
|
||||
optimizations, using, e.g., CPU IDs or benchmarks in the corresponding
|
||||
`supports()` function to decide whether to enable those optimizations.
|
||||
Benchmarks tend to be more future-proof than a list of CPU IDs, but the
|
||||
time taken for benchmarks at program startup has to be weighed against
|
||||
the subsequent speedup from the resulting optimizations.
|
||||
|
||||
To see how well lib25519 performs with the new compilers, run
|
||||
`lib25519-speed` on the target machine and look for the `foo` lines in
|
||||
the output. If the new performance is better than the performance shown
|
||||
on the `selected` lines:
|
||||
|
||||
* Copy the `lib25519-speed` output into a file on the `benchmarks`
|
||||
directory, typically named after the hostname of the target
|
||||
machine.
|
||||
|
||||
* Run `./prioritize` in the top-level directory to create `priority`
|
||||
files. These files tell lib25519 which implementations to select
|
||||
for any given architecture.
|
||||
|
||||
* Reconfigure (again with `--notrim`), recompile, rerun
|
||||
`lib25519-test`, and rerun `lib25519-speed` to check that the
|
||||
`default` lines now use the `foo` compiler.
|
||||
|
||||
If the `foo` implementation is outperformed by other implementations,
|
||||
then these steps don't help except for documenting this fact. The same
|
||||
implementation might turn out to be useful for subsequent `foo` CPUs.
|
||||
|
||||
## How to add new implementations
|
||||
|
||||
Taking full advantage of the `foo` instruction set usually requires
|
||||
writing new implementations. Sometimes there are also ideas for taking
|
||||
better advantage of existing instruction sets.
|
||||
|
||||
Structurally, adding a new implementation of a primitive is a simple
|
||||
matter of adding a new subdirectory with the code for that
|
||||
implementation. Most of the work is optimizing the use of `foo`
|
||||
intrinsics in `.c` files or `foo` instructions in `.S` files. Make sure
|
||||
to include an `architectures` file saying, e.g., `amd64 avx2 foo`.
|
||||
|
||||
Names of implementation directories can use letters, digits, dashes, and
|
||||
underscores. Do not use two implementation names that are the same when
|
||||
dashes and underscores are removed.
|
||||
|
||||
All `.c` and `.S` files in the implementation directory are compiled and
|
||||
linked. There is no need to edit a separate list of these files. You can
|
||||
also use `.h` files via the C preprocessor.
|
||||
|
||||
If an implementation is actually more restrictive than indicated in
|
||||
`architectures` then the resulting compiled library will fail on some
|
||||
machines (although perhaps that implementation will not be used by
|
||||
default). Putting unnecessary restrictions into `architectures` will not
|
||||
create such failures, but can unnecessarily limit performance.
|
||||
|
||||
Some, but not all, mistakes in `architectures` will produce warnings
|
||||
from the `checkinsns` script that runs automatically when lib25519 is
|
||||
compiled. Running the `lib25519-test` program tries all implementations,
|
||||
but only on the CPU where `lib25519-test` is being run, and `lib25519-test`
|
||||
does not guarantee code coverage: for example, other message lengths
|
||||
being signed could involve other code paths.
|
||||
|
||||
`amd64` implies little-endian, and implies architectural support for
|
||||
unaligned loads and stores. Beware, however, that the Intel/AMD
|
||||
vectorized `load`/`store` intrinsics (and the underlying `movdqa`
|
||||
instruction) require alignment; if in doubt, use `loadu`/`storeu` (and
|
||||
`movdqu`). The `lib25519-test` program checks unaligned inputs and
|
||||
outputs, but can miss issues with unaligned stack variables.
|
||||
|
||||
To test your implementation, compile everything, check for compiler
|
||||
warnings and errors, run `lib25519-test` (or just `lib25519-test nG` to
|
||||
test a `crypto_nG` implementation), and check for a line saying `all
|
||||
tests succeeded`. To use AddressSanitizer (for catching, at run time,
|
||||
buffer overflows in C code), add `-fsanitize=address` to the `gcc` and
|
||||
`clang` lines in `compilers/*`.
|
||||
|
||||
To see the performance of your implementation, run `lib25519-speed`.
|
||||
If the new performance is better than the performance shown on the
|
||||
`default` lines, follow the same steps as for a new instruction set:
|
||||
copy the `lib25519-speed` output into a file on the `benchmarks`
|
||||
directory; run `./prioritize` in the top-level directory to create
|
||||
`priority` files; reconfigure (again with `--notrim`); recompile; rerun
|
||||
`lib25519-test`; rerun `lib25519-speed`; check that the `default` lines
|
||||
now use the new implementation.
|
||||
|
||||
## How to handle namespacing
|
||||
|
||||
As in SUPERCOP and NaCl, to call `crypto_hash_sha512()`, you have to
|
||||
include `crypto_hash_sha512.h`; but to write an implementation of
|
||||
`crypto_hash_sha512()`, you have to instead include `crypto_hash.h` and
|
||||
define `crypto_hash`. Similar comments apply to other primitives.
|
||||
|
||||
The function name that's actually linked might end up as, e.g.,
|
||||
`lib25519_hash_sha512_blocksplusavx_C2_hash` where `blocksplusavx`
|
||||
indicates the implementation and `C2` indicates the compiler. Don't try
|
||||
to build this name into your implementation.
|
||||
|
||||
If you have another global symbol `x` (for example, a non-`static`
|
||||
function in a `.c` file, or a non-`static` variable outside functions in
|
||||
a `.c` file), you have to replace it with `CRYPTO_NAMESPACE(x)`, for
|
||||
example with `#define x CRYPTO_NAMESPACE(x)`.
|
||||
|
||||
For global symbols in `.S` files and `shared-*.c` files, use
|
||||
`CRYPTO_SHARED_NAMESPACE` instead of `CRYPTO_NAMESPACE`. For `.S` files
|
||||
that define both `x` and `_x` to handle platforms where `x` in C is `_x`
|
||||
in assembly, use `CRYPTO_SHARED_NAMESPACE(x)` and
|
||||
`_CRYPTO_SHARED_NAMESPACE(x)`; `CRYPTO_SHARED_NAMESPACE(_x)` is not
|
||||
sufficient.
|
||||
|
||||
lib25519 includes a mechanism to recognize files that are copied across
|
||||
implementations (possibly of different primitives) and to unify those
|
||||
into a file compiled only once, reducing the overall size of the
|
||||
compiled library and possibly improving cache utilization. To request
|
||||
this mechanism, include a line `// linker define x` for any global
|
||||
symbol `x` defined in the file, and a line `// linker use x` for any
|
||||
global symbol `x` used in the file from the same implementation (not
|
||||
`crypto_*` subroutines that you're calling, `randombytes`, etc.). This
|
||||
mechanism tries very hard, perhaps too hard, to avoid improperly
|
||||
unifying files: for example, even a slight difference in a `.h` file
|
||||
included by a file defining a used symbol will disable the mechanism.
|
||||
|
||||
Typical namespacing mistakes will produce either linker failures or
|
||||
warnings from the `checknamespace` script that runs automatically when
|
||||
lib25519 is compiled.
|
||||
@@ -0,0 +1,121 @@
|
||||
Creative Commons Legal Code
|
||||
|
||||
CC0 1.0 Universal
|
||||
|
||||
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
|
||||
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
|
||||
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
|
||||
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
|
||||
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
|
||||
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
|
||||
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
|
||||
HEREUNDER.
|
||||
|
||||
Statement of Purpose
|
||||
|
||||
The laws of most jurisdictions throughout the world automatically confer
|
||||
exclusive Copyright and Related Rights (defined below) upon the creator
|
||||
and subsequent owner(s) (each and all, an "owner") of an original work of
|
||||
authorship and/or a database (each, a "Work").
|
||||
|
||||
Certain owners wish to permanently relinquish those rights to a Work for
|
||||
the purpose of contributing to a commons of creative, cultural and
|
||||
scientific works ("Commons") that the public can reliably and without fear
|
||||
of later claims of infringement build upon, modify, incorporate in other
|
||||
works, reuse and redistribute as freely as possible in any form whatsoever
|
||||
and for any purposes, including without limitation commercial purposes.
|
||||
These owners may contribute to the Commons to promote the ideal of a free
|
||||
culture and the further production of creative, cultural and scientific
|
||||
works, or to gain reputation or greater distribution for their Work in
|
||||
part through the use and efforts of others.
|
||||
|
||||
For these and/or other purposes and motivations, and without any
|
||||
expectation of additional consideration or compensation, the person
|
||||
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
|
||||
is an owner of Copyright and Related Rights in the Work, voluntarily
|
||||
elects to apply CC0 to the Work and publicly distribute the Work under its
|
||||
terms, with knowledge of his or her Copyright and Related Rights in the
|
||||
Work and the meaning and intended legal effect of CC0 on those rights.
|
||||
|
||||
1. Copyright and Related Rights. A Work made available under CC0 may be
|
||||
protected by copyright and related or neighboring rights ("Copyright and
|
||||
Related Rights"). Copyright and Related Rights include, but are not
|
||||
limited to, the following:
|
||||
|
||||
i. the right to reproduce, adapt, distribute, perform, display,
|
||||
communicate, and translate a Work;
|
||||
ii. moral rights retained by the original author(s) and/or performer(s);
|
||||
iii. publicity and privacy rights pertaining to a person's image or
|
||||
likeness depicted in a Work;
|
||||
iv. rights protecting against unfair competition in regards to a Work,
|
||||
subject to the limitations in paragraph 4(a), below;
|
||||
v. rights protecting the extraction, dissemination, use and reuse of data
|
||||
in a Work;
|
||||
vi. database rights (such as those arising under Directive 96/9/EC of the
|
||||
European Parliament and of the Council of 11 March 1996 on the legal
|
||||
protection of databases, and under any national implementation
|
||||
thereof, including any amended or successor version of such
|
||||
directive); and
|
||||
vii. other similar, equivalent or corresponding rights throughout the
|
||||
world based on applicable law or treaty, and any national
|
||||
implementations thereof.
|
||||
|
||||
2. Waiver. To the greatest extent permitted by, but not in contravention
|
||||
of, applicable law, Affirmer hereby overtly, fully, permanently,
|
||||
irrevocably and unconditionally waives, abandons, and surrenders all of
|
||||
Affirmer's Copyright and Related Rights and associated claims and causes
|
||||
of action, whether now known or unknown (including existing as well as
|
||||
future claims and causes of action), in the Work (i) in all territories
|
||||
worldwide, (ii) for the maximum duration provided by applicable law or
|
||||
treaty (including future time extensions), (iii) in any current or future
|
||||
medium and for any number of copies, and (iv) for any purpose whatsoever,
|
||||
including without limitation commercial, advertising or promotional
|
||||
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
|
||||
member of the public at large and to the detriment of Affirmer's heirs and
|
||||
successors, fully intending that such Waiver shall not be subject to
|
||||
revocation, rescission, cancellation, termination, or any other legal or
|
||||
equitable action to disrupt the quiet enjoyment of the Work by the public
|
||||
as contemplated by Affirmer's express Statement of Purpose.
|
||||
|
||||
3. Public License Fallback. Should any part of the Waiver for any reason
|
||||
be judged legally invalid or ineffective under applicable law, then the
|
||||
Waiver shall be preserved to the maximum extent permitted taking into
|
||||
account Affirmer's express Statement of Purpose. In addition, to the
|
||||
extent the Waiver is so judged Affirmer hereby grants to each affected
|
||||
person a royalty-free, non transferable, non sublicensable, non exclusive,
|
||||
irrevocable and unconditional license to exercise Affirmer's Copyright and
|
||||
Related Rights in the Work (i) in all territories worldwide, (ii) for the
|
||||
maximum duration provided by applicable law or treaty (including future
|
||||
time extensions), (iii) in any current or future medium and for any number
|
||||
of copies, and (iv) for any purpose whatsoever, including without
|
||||
limitation commercial, advertising or promotional purposes (the
|
||||
"License"). The License shall be deemed effective as of the date CC0 was
|
||||
applied by Affirmer to the Work. Should any part of the License for any
|
||||
reason be judged legally invalid or ineffective under applicable law, such
|
||||
partial invalidity or ineffectiveness shall not invalidate the remainder
|
||||
of the License, and in such case Affirmer hereby affirms that he or she
|
||||
will not (i) exercise any of his or her remaining Copyright and Related
|
||||
Rights in the Work or (ii) assert any associated claims and causes of
|
||||
action with respect to the Work, in either case contrary to Affirmer's
|
||||
express Statement of Purpose.
|
||||
|
||||
4. Limitations and Disclaimers.
|
||||
|
||||
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
||||
surrendered, licensed or otherwise affected by this document.
|
||||
b. Affirmer offers the Work as-is and makes no representations or
|
||||
warranties of any kind concerning the Work, express, implied,
|
||||
statutory or otherwise, including without limitation warranties of
|
||||
title, merchantability, fitness for a particular purpose, non
|
||||
infringement, or the absence of latent or other defects, accuracy, or
|
||||
the present or absence of errors, whether or not discoverable, all to
|
||||
the greatest extent permissible under applicable law.
|
||||
c. Affirmer disclaims responsibility for clearing rights of other persons
|
||||
that may apply to the Work or any use thereof, including without
|
||||
limitation any person's Copyright and Related Rights in the Work.
|
||||
Further, Affirmer disclaims responsibility for obtaining any necessary
|
||||
consents, permissions or other rights required for any use of the
|
||||
Work.
|
||||
d. Affirmer understands and acknowledges that Creative Commons is not a
|
||||
party to this document and has no duty or obligation with respect to
|
||||
this CC0 or use of the Work.
|
||||
@@ -0,0 +1,8 @@
|
||||
default:
|
||||
cd build && $(MAKE)
|
||||
|
||||
install:
|
||||
cd build && $(MAKE) install
|
||||
|
||||
clean:
|
||||
cd build && $(MAKE) clean
|
||||
@@ -0,0 +1,28 @@
|
||||
lib25519 is a microlibrary for the X25519 encryption system and the
|
||||
Ed25519 signature system, both of which use the Curve25519 elliptic
|
||||
curve. Curve25519 is the fastest curve in TLS 1.3, and the only curve in
|
||||
Wireguard, Signal, and many other applications (see Nicolai Brown's page
|
||||
https://ianix.com/pub/curve25519-deployment.html).
|
||||
|
||||
lib25519 has a very simple stateless API based on the SUPERCOP API, with
|
||||
wire-format inputs and outputs, providing functions that directly match
|
||||
the central cryptographic operations in X25519 and Ed25519:
|
||||
|
||||
lib25519_dh_keypair(pk,sk): X25519 key generation
|
||||
lib25519_dh(k,pk,sk): shared-secret generation
|
||||
|
||||
lib25519_sign_keypair(pk,sk): Ed25519 key generation
|
||||
lib25519_sign(sm,&smlen,m,mlen,sk): signing
|
||||
lib25519_sign_open(m,&mlen,sm,smlen,pk): verification + message recovery
|
||||
|
||||
Internally, lib25519 includes implementations designed for performance
|
||||
on various CPUs, implementations designed to work portably across CPUs,
|
||||
and automatic run-time selection of implementations.
|
||||
|
||||
lib25519 is intended to be called by larger multi-function libraries,
|
||||
including libraries in other languages via FFI. The idea is that
|
||||
lib25519 will take responsibility for the details of X25519/Ed25519
|
||||
computation, including optimization, timing-attack protection, and
|
||||
eventually verification, freeing up the calling libraries to concentrate
|
||||
on application-specific needs such as protocol integration. Applications
|
||||
can also call lib25519 directly.
|
||||
@@ -0,0 +1,87 @@
|
||||
Security model: X25519 is designed to be strong as a component of
|
||||
various well-known "hashed DH" applications, and in particular is
|
||||
designed to make the CDH problem difficult with respect to the standard
|
||||
base. Ed25519 is designed to provide EUF-CMA, the standard notion of
|
||||
unforgeability of a signature system under chosen-message attacks.
|
||||
However, some applications need other security notions that are not
|
||||
provided by X25519 and Ed25519.
|
||||
|
||||
Security level: X25519 and Ed25519 are more difficult to break by any
|
||||
known attacks than a typical 128-bit cipher. They have an extremely
|
||||
stable security track record, with two decades of research changing
|
||||
security levels by only a fraction of a bit. They also proactively avoid
|
||||
various potential risks. However, large quantum computers will break
|
||||
both X25519 and Ed25519.
|
||||
|
||||
Software verification: lib25519 is intended to become a central target
|
||||
for verification of full functional correctness of implementations of
|
||||
X25519 and Ed25519. However, only certain portions have been verified so
|
||||
far, and at this point the code should be presumed to have critical
|
||||
bugs.
|
||||
|
||||
Timing attacks: lib25519 is designed to avoid all data flow from secret
|
||||
data to memory addresses and branch conditions. Fully protecting the
|
||||
user against timing attacks requires addressing more issues, such as the
|
||||
following:
|
||||
|
||||
* Other common instructions used by lib25519 take variable time on
|
||||
some CPUs. In particular, there are some embedded CPUs with
|
||||
variable-time multipliers.
|
||||
|
||||
* Many CPUs include dynamic frequency-selection mechanisms such as
|
||||
Turbo Boost, exposing power information via timing information.
|
||||
Fortunately, these CPUs are normally shipped with simple options to
|
||||
disable Turbo Boost etc., closing this leak; unfortunately, Turbo
|
||||
Boost is enabled by default on CPUs that support it.
|
||||
|
||||
* Cryptographic keys are normally handled by cryptographic software,
|
||||
but other user secrets are handled by many different pieces of
|
||||
software.
|
||||
|
||||
See https://timing.attacks.cr.yp.to for a timing-attack survey and many
|
||||
references.
|
||||
|
||||
Speculative-execution attacks: Some countermeasures against
|
||||
speculative-execution attacks are planned but are not included in the
|
||||
current version of lib25519. Full protection again requires addressing
|
||||
issues at other system layers.
|
||||
|
||||
Further side-channel attacks: Even if all legitimate user sensors are
|
||||
successfully kept isolated from attackers, attackers can set up their
|
||||
own power sensors, electromagnetic sensors, acoustic sensors, etc.
|
||||
Keeping cryptographic operations physically separated from sensors tends
|
||||
to make such attacks much more expensive but is often infeasible.
|
||||
"Masking" cryptographic computations seems to help and can be
|
||||
affordable, although the security of masking is difficult to evaluate
|
||||
and there are many broken masked implementations. Currently lib25519
|
||||
does not include any masked implementations, so presumably it is easily
|
||||
breakable by power attacks in environments where attackers can see power
|
||||
consumption.
|
||||
|
||||
Further attacks: lib25519 creates an Ed25519 signing nonce as a hash of
|
||||
the message, a long-term secret, and new randomness (specifically, the
|
||||
nonce is a keyed hash of the message, where the key is the hash of the
|
||||
long-term secret and new randomness). The literature identifies various
|
||||
advantages and disadvantages of including these hash inputs:
|
||||
|
||||
* Including the message and a long-term secret protects against
|
||||
signing-time RNG failures. This is a standard feature of Ed25519
|
||||
signers.
|
||||
|
||||
* To the extent that the RNG works, including new randomness has the
|
||||
advantage of stopping (e.g.) fault attacks that rely on a nonce
|
||||
being reused for multiple signatures of the same message.
|
||||
|
||||
* Including new randomness has the disadvantage of requiring state
|
||||
for the RNG. However, lib25519 runs within an OS that in any case
|
||||
maintains state and provides an RNG.
|
||||
|
||||
* Including new randomness also has the disadvantage of interfering
|
||||
with the use of test vectors. This disadvantage does not apply to
|
||||
lib25519: lib25519's test vectors already handle randomness.
|
||||
|
||||
lib25519 includes a few further steps that could be useful in stopping
|
||||
fault attacks (for example, signature verification internally converts
|
||||
invalid public keys to the key (...,26), which does not have a known
|
||||
discrete logarithm), but in general lib25519 should be presumed
|
||||
breakable by fault attacks.
|
||||
@@ -0,0 +1,17 @@
|
||||
track history more precisely in documentation for individual source files
|
||||
consider symlinks from files in build tree to source tree (via a build/source symlink so build can link elsewhere)
|
||||
allow shared-* for API functions (requires tweaking dispatch)
|
||||
speedups for more architectures
|
||||
speedups for more microarchitectures
|
||||
consider faster PRF for the keyed hash giving the nonce
|
||||
merge subroutines in source to the extent possible
|
||||
scan for and remove any unused functions and files
|
||||
restructure for more merging at object-code level
|
||||
sort object files (for, e.g., improved cache utilization)
|
||||
optionally allow post-installation patching of current cpu as an exceptional cpuid
|
||||
(based on benchmarks and, with more CPU time, full functionality tests)
|
||||
dispatch: eliminate, e.g., avx2 if avx is higher priority
|
||||
speed up dispatch cpuid tests (lazy evaluation, merging cpuid calls)
|
||||
randombytes: support getrandom, getentropy
|
||||
verify constbranch, constindex
|
||||
full functional verification
|
||||
@@ -0,0 +1,51 @@
|
||||
crypto_verify/32
|
||||
#define crypto_verify_32_BYTES 32
|
||||
int crypto_verify(const unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_hashblocks/sha512 f0bc623a9033f9f648336540e11e85be21aeb60905c7d8808d10ea20b39d58d1 f1a2c46c9ce7fa4cd22f180907d77b6f7189badef4b9a1b5284d6fb9db859b76
|
||||
#define crypto_hashblocks_sha512_STATEBYTES 64
|
||||
#define crypto_hashblocks_sha512_BLOCKBYTES 128
|
||||
int crypto_hashblocks(unsigned char *,const unsigned char *,long long);
|
||||
|
||||
crypto_hash/sha512 8220572f58bd4730be165c9739d8d4b0fd2e0229dbe01e25b4aed23f00f23b70 c1e322b7cbfc941260c5508967ba05bce22eeee94d425e708b7c3301ea1d5e2e
|
||||
#define crypto_hash_sha512_BYTES 64
|
||||
void crypto_hash(unsigned char *,const unsigned char *,long long);
|
||||
|
||||
crypto_pow/inv25519 ad2062946e82718da820226504991a85c5fe56bdbff959c1313f837ee13b37be 59b3045a01e1fca2a86a0280aee8b985c5e040afdc0d3e85ed87eb97a46a4dd6
|
||||
#define crypto_pow_inv25519_BYTES 32
|
||||
void crypto_pow(unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_nP/montgomery25519 b861d66109b42359e5994ed57ae566827c345b65a9d0671700320b82888397ec 740924011f3448f65299f61b087f74a6eb9651a4203dfbf621d2bec54e149405
|
||||
#define crypto_nP_montgomery25519_SCALARBYTES 32
|
||||
#define crypto_nP_montgomery25519_POINTBYTES 32
|
||||
void crypto_nP(unsigned char *,const unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_nG/merged25519 a4e761839798a07817484e97605bd63215b4938934ed9ce01935bbced48155bc 0a01c09fc8a8c7e8c18f841b2e1b2da9c156868737d194d223b03531cf2db731
|
||||
#define crypto_nG_merged25519_SCALARBYTES 32
|
||||
#define crypto_nG_merged25519_POINTBYTES 32
|
||||
crypto_nG/montgomery25519 5c8a5d8b32e3d26b33071779ce9191095d7bd4ab3bb6a40b68976e41a98cfc3b 2becc8cd065820fcf82e53a03c5b5235582480fc11d072f2bd15153aebd4e057
|
||||
#define crypto_nG_montgomery25519_SCALARBYTES 32
|
||||
#define crypto_nG_montgomery25519_POINTBYTES 32
|
||||
void crypto_nG(unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_mGnP/ed25519 dc80be44fb0d482c5ae430779e76fe612c53fcd9e5847254bf27ab34e90745f4 9e1a3b7015c8fdb12763fd88494f5bfe9e2565ead4d3407d5ecf7ff6ca24c1d0
|
||||
#define crypto_mGnP_ed25519_MBYTES 32
|
||||
#define crypto_mGnP_ed25519_NBYTES 64
|
||||
#define crypto_mGnP_ed25519_PBYTES 32
|
||||
#define crypto_mGnP_ed25519_OUTPUTBYTES 33
|
||||
void crypto_mGnP(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_dh/x25519 2c8a73ec86d5d4c4bc838f49cfd78c87b60b534ae6fff59ce3bea0c32cdc1450 b09016b3a1371786b46a183085133338159e623c5eb9cbc5eaa4f8b62d6c5aea
|
||||
#define crypto_dh_x25519_SECRETKEYBYTES 32
|
||||
#define crypto_dh_x25519_PUBLICKEYBYTES 32
|
||||
#define crypto_dh_x25519_BYTES 32
|
||||
void crypto_dh_keypair(unsigned char *,unsigned char *);
|
||||
void crypto_dh(unsigned char *,const unsigned char *,const unsigned char *);
|
||||
|
||||
crypto_sign/ed25519 ce11fd7c1eac4dd0bc5eec49b26ad1e91aef696fae50ce377dbd806dc394da01 2ed857f17c917a8185e6c296303a11772ae45683a5e7cb5b095489bad65fffde
|
||||
#define crypto_sign_ed25519_SECRETKEYBYTES 64
|
||||
#define crypto_sign_ed25519_PUBLICKEYBYTES 32
|
||||
#define crypto_sign_ed25519_BYTES 64
|
||||
void crypto_sign_keypair(unsigned char *,unsigned char *);
|
||||
void crypto_sign(unsigned char *,long long *,const unsigned char *,long long,const unsigned char *);
|
||||
int crypto_sign_open(unsigned char *,long long *,const unsigned char *,long long,const unsigned char *);
|
||||
Executable
+338
@@ -0,0 +1,338 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
output = r'''/* WARNING: auto-generated (by autogen-speed); do not edit */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/resource.h>
|
||||
#include "cpucycles.h" /* -lcpucycles */
|
||||
#include "lib25519.h" /* -l25519 */
|
||||
#include "randombytes.h" /* -lrandombytes_kernel */
|
||||
|
||||
static const char *targeto = 0;
|
||||
static const char *targetp = 0;
|
||||
static const char *targeti = 0;
|
||||
|
||||
#include "limits.inc"
|
||||
|
||||
static unsigned char *alignedcalloc(unsigned long long len)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) calloc(1,len + 128);
|
||||
if (!x) abort();
|
||||
/* will never deallocate so shifting is ok */
|
||||
x += 63 & (-(unsigned long) x);
|
||||
return x;
|
||||
}
|
||||
|
||||
#define TIMINGS 15
|
||||
static long long t[TIMINGS+1];
|
||||
|
||||
static void t_print(const char *op,long long impl,long long len)
|
||||
{
|
||||
long long median = 0;
|
||||
|
||||
printf("%s",op);
|
||||
if (impl >= 0)
|
||||
printf(" %lld",impl);
|
||||
else
|
||||
printf(" selected");
|
||||
printf(" %lld",len);
|
||||
for (long long i = 0;i < TIMINGS;++i)
|
||||
t[i] = t[i+1]-t[i];
|
||||
for (long long j = 0;j < TIMINGS;++j) {
|
||||
long long belowj = 0;
|
||||
long long abovej = 0;
|
||||
for (long long i = 0;i < TIMINGS;++i) if (t[i] < t[j]) ++belowj;
|
||||
for (long long i = 0;i < TIMINGS;++i) if (t[i] > t[j]) ++abovej;
|
||||
if (belowj*2 < TIMINGS && abovej*2 < TIMINGS) {
|
||||
median = t[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf(" %lld ",median);
|
||||
for (long long i = 0;i < TIMINGS;++i)
|
||||
printf("%+lld",t[i]-median);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
#define MAXTEST_BYTES 65536
|
||||
|
||||
static void measure_cpucycles(void)
|
||||
{
|
||||
printf("cpucycles selected persecond %lld\n",cpucycles_persecond());
|
||||
printf("cpucycles selected implementation %s\n",cpucycles_implementation());
|
||||
|
||||
for (long long i = 0;i <= TIMINGS;++i)
|
||||
t[i] = cpucycles();
|
||||
t_print("cpucycles",-1,0);
|
||||
}
|
||||
|
||||
static void measure_randombytes(void)
|
||||
{
|
||||
unsigned char *m = alignedcalloc(MAXTEST_BYTES);
|
||||
long long mlen = 0;
|
||||
|
||||
while (mlen < MAXTEST_BYTES) {
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
randombytes(m,mlen);
|
||||
}
|
||||
t_print("randombytes",-1,mlen);
|
||||
mlen += 1+mlen/2;
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
# XXX: integrate todo into api
|
||||
todo = (
|
||||
('verify',(
|
||||
('x','lib25519_verify_BYTES'),
|
||||
('y','lib25519_verify_BYTES'),
|
||||
),(
|
||||
('crypto_verify','x','y'),
|
||||
)),
|
||||
('hashblocks',(
|
||||
('h','lib25519_hashblocks_STATEBYTES'),
|
||||
('m','MAXTEST_BYTES'),
|
||||
('mlen',None),
|
||||
),(
|
||||
('crypto_hashblocks','h','m','mlen'),
|
||||
)),
|
||||
('hash',(
|
||||
('h','lib25519_hash_BYTES'),
|
||||
('m','MAXTEST_BYTES'),
|
||||
('mlen',None),
|
||||
),(
|
||||
('crypto_hash','h','m','mlen'),
|
||||
)),
|
||||
('pow',(
|
||||
('n','lib25519_pow_BYTES'),
|
||||
('ne','lib25519_pow_BYTES'),
|
||||
),(
|
||||
('crypto_pow','ne','n'),
|
||||
)),
|
||||
('nP',(
|
||||
('n','lib25519_nP_SCALARBYTES'),
|
||||
('P','lib25519_nP_POINTBYTES'),
|
||||
('nP','lib25519_nP_POINTBYTES'),
|
||||
),(
|
||||
('crypto_nP','nP','n','P'),
|
||||
)),
|
||||
('nG',(
|
||||
('n','lib25519_nP_SCALARBYTES'),
|
||||
('nG','lib25519_nP_POINTBYTES'),
|
||||
),(
|
||||
('crypto_nG','nG','n'),
|
||||
)),
|
||||
('mGnP',(
|
||||
('mGnP','lib25519_mGnP_OUTPUTBYTES'),
|
||||
('m','lib25519_mGnP_MBYTES'),
|
||||
('n','lib25519_mGnP_NBYTES'),
|
||||
('P','lib25519_mGnP_PBYTES'),
|
||||
),(
|
||||
('crypto_mGnP','mGnP','m','n','P'),
|
||||
)),
|
||||
('dh',(
|
||||
('pka','lib25519_dh_PUBLICKEYBYTES'),
|
||||
('ska','lib25519_dh_SECRETKEYBYTES'),
|
||||
('pkb','lib25519_dh_PUBLICKEYBYTES'),
|
||||
('skb','lib25519_dh_SECRETKEYBYTES'),
|
||||
('ka','lib25519_dh_BYTES'),
|
||||
),(
|
||||
('crypto_dh_keypair','pka','ska'),
|
||||
('crypto_dh_keypair','pkb','skb'),
|
||||
('crypto_dh','ka','pkb','ska'),
|
||||
)),
|
||||
('sign',(
|
||||
('pk','lib25519_sign_PUBLICKEYBYTES'),
|
||||
('sk','lib25519_sign_SECRETKEYBYTES'),
|
||||
('m','MAXTEST_BYTES+lib25519_sign_BYTES'),
|
||||
('sm','MAXTEST_BYTES+lib25519_sign_BYTES'),
|
||||
('m2','MAXTEST_BYTES+lib25519_sign_BYTES'),
|
||||
('mlen',None),
|
||||
('smlen',None),
|
||||
('m2len',None),
|
||||
),(
|
||||
('crypto_sign_keypair','pk','sk'),
|
||||
('crypto_sign','sm','&smlen','m','mlen','sk'),
|
||||
('crypto_sign_open','m2','&m2len','sm','smlen','pk'),
|
||||
)),
|
||||
)
|
||||
|
||||
operations = []
|
||||
primitives = {}
|
||||
sizes = {}
|
||||
exports = {}
|
||||
prototypes = {}
|
||||
|
||||
with open('api') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('crypto_'):
|
||||
x = line.split()
|
||||
x = x[0].split('/')
|
||||
assert len(x) == 2
|
||||
o = x[0].split('_')[1]
|
||||
if o not in operations: operations += [o]
|
||||
p = x[1]
|
||||
if o not in primitives: primitives[o] = []
|
||||
primitives[o] += [p]
|
||||
continue
|
||||
if line.startswith('#define '):
|
||||
x = line.split(' ')
|
||||
x = x[1].split('_')
|
||||
assert len(x) == 4
|
||||
assert x[0] == 'crypto'
|
||||
o = x[1]
|
||||
p = x[2]
|
||||
if (o,p) not in sizes: sizes[o,p] = ''
|
||||
sizes[o,p] += line+'\n'
|
||||
continue
|
||||
if line.endswith(');'):
|
||||
fun,args = line[:-2].split('(')
|
||||
rettype,fun = fun.split()
|
||||
fun = fun.split('_')
|
||||
o = fun[1]
|
||||
assert fun[0] == 'crypto'
|
||||
if o not in exports: exports[o] = []
|
||||
exports[o] += ['_'.join(fun[1:])]
|
||||
if o not in prototypes: prototypes[o] = []
|
||||
prototypes[o] += [(rettype,fun,args)]
|
||||
|
||||
for t in todo:
|
||||
o,vars,benches = t
|
||||
|
||||
for p in primitives[o]:
|
||||
|
||||
output += '\n'
|
||||
output += 'static void measure_%s_%s(void)\n' % (o,p)
|
||||
output += '{\n'
|
||||
|
||||
output += ' if (targeto && strcmp(targeto,"%s")) return;\n' % o
|
||||
output += ' if (targetp && strcmp(targetp,"%s")) return;\n' % p
|
||||
|
||||
varsize = {}
|
||||
for v,size in vars:
|
||||
if size is None:
|
||||
output += ' long long %s;\n' % v
|
||||
else:
|
||||
size = size.replace('lib25519_'+o,'lib25519_'+o+'_'+p)
|
||||
output += ' unsigned char *%s = alignedcalloc(%s);\n' % (v,size)
|
||||
varsize[v] = size
|
||||
output += '\n'
|
||||
|
||||
output += ' for (long long impl = -1;impl < lib25519_numimpl_%s_%s();++impl) {\n' % (o,p)
|
||||
for rettype,fun,args in prototypes[o]:
|
||||
output += ' %s (*%s)(%s);\n' % (rettype,'_'.join(fun),args)
|
||||
|
||||
output += ' if (targeti && strcmp(targeti,lib25519_dispatch_%s_%s_implementation(impl))) continue;\n' % (o,p)
|
||||
|
||||
output += ' if (impl >= 0) {\n'
|
||||
for rettype,fun,args in prototypes[o]:
|
||||
f2 = ['lib25519','dispatch',o,p]+fun[2:]
|
||||
output += ' %s = %s(impl);\n' % ('_'.join(fun),'_'.join(f2))
|
||||
output += r' printf("%s_%s %%lld implementation %%s compiler %%s\n",impl,lib25519_dispatch_%s_%s_implementation(impl),lib25519_dispatch_%s_%s_compiler(impl));' % (o,p,o,p,o,p)
|
||||
output += '\n'
|
||||
output += ' } else {\n'
|
||||
for rettype,fun,args in prototypes[o]:
|
||||
f2 = ['lib25519',o,p]+fun[2:]
|
||||
output += ' %s = %s;\n' % ('_'.join(fun),'_'.join(f2))
|
||||
output += r' printf("%s_%s selected implementation %%s compiler %%s\n",lib25519_%s_%s_implementation(),lib25519_%s_%s_compiler());' % (o,p,o,p,o,p)
|
||||
output += '\n'
|
||||
output += ' }\n'
|
||||
|
||||
for v,size in vars:
|
||||
if size is not None:
|
||||
size = size.replace('lib25519_'+o,'lib25519_'+o+'_'+p)
|
||||
output += ' randombytes(%s,%s);\n' % (v,size)
|
||||
|
||||
alreadybenched = set()
|
||||
alreadybenched.add('assert')
|
||||
|
||||
for b in benches:
|
||||
if b[0] in alreadybenched:
|
||||
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
|
||||
continue
|
||||
|
||||
fun = b[0].split('_')
|
||||
shortfun = '_'.join([o,p]+fun[2:])
|
||||
alreadybenched.add(b[0])
|
||||
|
||||
if 'mlen' in b[1:] or 'smlen' in b[1:]:
|
||||
output += ' mlen = 0;\n'
|
||||
output += ' while (mlen <= MAXTEST_BYTES) {\n'
|
||||
output += ' randombytes(m,mlen);\n'
|
||||
|
||||
if shortfun == 'sign_ed25519_open': # XXX: put this into todo
|
||||
output += ' lib25519_sign(sm,&smlen,m,mlen,sk);\n'
|
||||
|
||||
output += ' for (long long i = 0;i <= TIMINGS;++i) {\n'
|
||||
output += ' t[i] = cpucycles();\n'
|
||||
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
|
||||
output += ' }\n'
|
||||
output += ' t_print("%s",impl,mlen);\n' % (shortfun)
|
||||
|
||||
if shortfun == 'sign_ed25519_open': # XXX: put this into todo
|
||||
output += ' /* this is, in principle, not a test program */\n'
|
||||
output += ' /* but some checks here help validate the data flow above */\n'
|
||||
output += ' assert(m2len == mlen);\n'
|
||||
output += ' assert(!memcmp(m,m2,mlen));\n'
|
||||
|
||||
if o == 'sign': # XXX: put this into todo
|
||||
output += ' mlen += 1+mlen/4;\n'
|
||||
else:
|
||||
output += ' mlen += 1+mlen/2;\n'
|
||||
output += ' }\n'
|
||||
else:
|
||||
output += ' for (long long i = 0;i <= TIMINGS;++i) {\n'
|
||||
output += ' t[i] = cpucycles();\n'
|
||||
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
|
||||
output += ' }\n'
|
||||
output += ' t_print("%s",impl,%s);\n' % (shortfun,varsize[b[1]])
|
||||
|
||||
output += ' }\n'
|
||||
output += '}\n'
|
||||
|
||||
output += r'''
|
||||
#include "print_cpuid.inc"
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
printf("lib25519 version %s\n",lib25519_version);
|
||||
printf("lib25519 arch %s\n",lib25519_arch);
|
||||
print_cpuid();
|
||||
|
||||
if (*argv) ++argv;
|
||||
if (*argv) {
|
||||
targeto = *argv++;
|
||||
if (*argv) {
|
||||
targetp = *argv++;
|
||||
if (*argv) {
|
||||
targeti = *argv++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
measure_cpucycles();
|
||||
measure_randombytes();
|
||||
limits();
|
||||
'''
|
||||
|
||||
for t in todo:
|
||||
o,vars,benches = t
|
||||
for p in primitives[o]:
|
||||
output += ' measure_%s_%s();\n' % (o,p)
|
||||
|
||||
output += r'''
|
||||
return 0;
|
||||
}
|
||||
'''
|
||||
|
||||
with open('command/lib25519-speed.c','w') as f:
|
||||
f.write(output)
|
||||
Executable
+1217
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,466 @@
|
||||
/* WARNING: auto-generated (by autogen-speed); do not edit */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/resource.h>
|
||||
#include "cpucycles.h" /* -lcpucycles */
|
||||
#include "lib25519.h" /* -l25519 */
|
||||
#include "randombytes.h" /* -lrandombytes_kernel */
|
||||
|
||||
static const char *targeto = 0;
|
||||
static const char *targetp = 0;
|
||||
static const char *targeti = 0;
|
||||
|
||||
#include "limits.inc"
|
||||
|
||||
static unsigned char *alignedcalloc(unsigned long long len)
|
||||
{
|
||||
unsigned char *x = (unsigned char *) calloc(1,len + 128);
|
||||
if (!x) abort();
|
||||
/* will never deallocate so shifting is ok */
|
||||
x += 63 & (-(unsigned long) x);
|
||||
return x;
|
||||
}
|
||||
|
||||
#define TIMINGS 15
|
||||
static long long t[TIMINGS+1];
|
||||
|
||||
static void t_print(const char *op,long long impl,long long len)
|
||||
{
|
||||
long long median = 0;
|
||||
|
||||
printf("%s",op);
|
||||
if (impl >= 0)
|
||||
printf(" %lld",impl);
|
||||
else
|
||||
printf(" selected");
|
||||
printf(" %lld",len);
|
||||
for (long long i = 0;i < TIMINGS;++i)
|
||||
t[i] = t[i+1]-t[i];
|
||||
for (long long j = 0;j < TIMINGS;++j) {
|
||||
long long belowj = 0;
|
||||
long long abovej = 0;
|
||||
for (long long i = 0;i < TIMINGS;++i) if (t[i] < t[j]) ++belowj;
|
||||
for (long long i = 0;i < TIMINGS;++i) if (t[i] > t[j]) ++abovej;
|
||||
if (belowj*2 < TIMINGS && abovej*2 < TIMINGS) {
|
||||
median = t[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf(" %lld ",median);
|
||||
for (long long i = 0;i < TIMINGS;++i)
|
||||
printf("%+lld",t[i]-median);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
#define MAXTEST_BYTES 65536
|
||||
|
||||
static void measure_cpucycles(void)
|
||||
{
|
||||
printf("cpucycles selected persecond %lld\n",cpucycles_persecond());
|
||||
printf("cpucycles selected implementation %s\n",cpucycles_implementation());
|
||||
|
||||
for (long long i = 0;i <= TIMINGS;++i)
|
||||
t[i] = cpucycles();
|
||||
t_print("cpucycles",-1,0);
|
||||
}
|
||||
|
||||
static void measure_randombytes(void)
|
||||
{
|
||||
unsigned char *m = alignedcalloc(MAXTEST_BYTES);
|
||||
long long mlen = 0;
|
||||
|
||||
while (mlen < MAXTEST_BYTES) {
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
randombytes(m,mlen);
|
||||
}
|
||||
t_print("randombytes",-1,mlen);
|
||||
mlen += 1+mlen/2;
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_verify_32(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"verify")) return;
|
||||
if (targetp && strcmp(targetp,"32")) return;
|
||||
unsigned char *x = alignedcalloc(lib25519_verify_32_BYTES);
|
||||
unsigned char *y = alignedcalloc(lib25519_verify_32_BYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_verify_32();++impl) {
|
||||
int (*crypto_verify)(const unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_verify_32_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_verify = lib25519_dispatch_verify_32(impl);
|
||||
printf("verify_32 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_verify_32_implementation(impl),lib25519_dispatch_verify_32_compiler(impl));
|
||||
} else {
|
||||
crypto_verify = lib25519_verify_32;
|
||||
printf("verify_32 selected implementation %s compiler %s\n",lib25519_verify_32_implementation(),lib25519_verify_32_compiler());
|
||||
}
|
||||
randombytes(x,lib25519_verify_32_BYTES);
|
||||
randombytes(y,lib25519_verify_32_BYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_verify(x,y);
|
||||
}
|
||||
t_print("verify_32",impl,lib25519_verify_32_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_hashblocks_sha512(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"hashblocks")) return;
|
||||
if (targetp && strcmp(targetp,"sha512")) return;
|
||||
unsigned char *h = alignedcalloc(lib25519_hashblocks_sha512_STATEBYTES);
|
||||
unsigned char *m = alignedcalloc(MAXTEST_BYTES);
|
||||
long long mlen;
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_hashblocks_sha512();++impl) {
|
||||
int (*crypto_hashblocks)(unsigned char *,const unsigned char *,long long);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_hashblocks_sha512_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_hashblocks = lib25519_dispatch_hashblocks_sha512(impl);
|
||||
printf("hashblocks_sha512 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_hashblocks_sha512_implementation(impl),lib25519_dispatch_hashblocks_sha512_compiler(impl));
|
||||
} else {
|
||||
crypto_hashblocks = lib25519_hashblocks_sha512;
|
||||
printf("hashblocks_sha512 selected implementation %s compiler %s\n",lib25519_hashblocks_sha512_implementation(),lib25519_hashblocks_sha512_compiler());
|
||||
}
|
||||
randombytes(h,lib25519_hashblocks_sha512_STATEBYTES);
|
||||
randombytes(m,MAXTEST_BYTES);
|
||||
mlen = 0;
|
||||
while (mlen <= MAXTEST_BYTES) {
|
||||
randombytes(m,mlen);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_hashblocks(h,m,mlen);
|
||||
}
|
||||
t_print("hashblocks_sha512",impl,mlen);
|
||||
mlen += 1+mlen/2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_hash_sha512(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"hash")) return;
|
||||
if (targetp && strcmp(targetp,"sha512")) return;
|
||||
unsigned char *h = alignedcalloc(lib25519_hash_sha512_BYTES);
|
||||
unsigned char *m = alignedcalloc(MAXTEST_BYTES);
|
||||
long long mlen;
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_hash_sha512();++impl) {
|
||||
void (*crypto_hash)(unsigned char *,const unsigned char *,long long);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_hash_sha512_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_hash = lib25519_dispatch_hash_sha512(impl);
|
||||
printf("hash_sha512 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_hash_sha512_implementation(impl),lib25519_dispatch_hash_sha512_compiler(impl));
|
||||
} else {
|
||||
crypto_hash = lib25519_hash_sha512;
|
||||
printf("hash_sha512 selected implementation %s compiler %s\n",lib25519_hash_sha512_implementation(),lib25519_hash_sha512_compiler());
|
||||
}
|
||||
randombytes(h,lib25519_hash_sha512_BYTES);
|
||||
randombytes(m,MAXTEST_BYTES);
|
||||
mlen = 0;
|
||||
while (mlen <= MAXTEST_BYTES) {
|
||||
randombytes(m,mlen);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_hash(h,m,mlen);
|
||||
}
|
||||
t_print("hash_sha512",impl,mlen);
|
||||
mlen += 1+mlen/2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_pow_inv25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"pow")) return;
|
||||
if (targetp && strcmp(targetp,"inv25519")) return;
|
||||
unsigned char *n = alignedcalloc(lib25519_pow_inv25519_BYTES);
|
||||
unsigned char *ne = alignedcalloc(lib25519_pow_inv25519_BYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_pow_inv25519();++impl) {
|
||||
void (*crypto_pow)(unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_pow_inv25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_pow = lib25519_dispatch_pow_inv25519(impl);
|
||||
printf("pow_inv25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_pow_inv25519_implementation(impl),lib25519_dispatch_pow_inv25519_compiler(impl));
|
||||
} else {
|
||||
crypto_pow = lib25519_pow_inv25519;
|
||||
printf("pow_inv25519 selected implementation %s compiler %s\n",lib25519_pow_inv25519_implementation(),lib25519_pow_inv25519_compiler());
|
||||
}
|
||||
randombytes(n,lib25519_pow_inv25519_BYTES);
|
||||
randombytes(ne,lib25519_pow_inv25519_BYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_pow(ne,n);
|
||||
}
|
||||
t_print("pow_inv25519",impl,lib25519_pow_inv25519_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_nP_montgomery25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"nP")) return;
|
||||
if (targetp && strcmp(targetp,"montgomery25519")) return;
|
||||
unsigned char *n = alignedcalloc(lib25519_nP_montgomery25519_SCALARBYTES);
|
||||
unsigned char *P = alignedcalloc(lib25519_nP_montgomery25519_POINTBYTES);
|
||||
unsigned char *nP = alignedcalloc(lib25519_nP_montgomery25519_POINTBYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_nP_montgomery25519();++impl) {
|
||||
void (*crypto_nP)(unsigned char *,const unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_nP_montgomery25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_nP = lib25519_dispatch_nP_montgomery25519(impl);
|
||||
printf("nP_montgomery25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_nP_montgomery25519_implementation(impl),lib25519_dispatch_nP_montgomery25519_compiler(impl));
|
||||
} else {
|
||||
crypto_nP = lib25519_nP_montgomery25519;
|
||||
printf("nP_montgomery25519 selected implementation %s compiler %s\n",lib25519_nP_montgomery25519_implementation(),lib25519_nP_montgomery25519_compiler());
|
||||
}
|
||||
randombytes(n,lib25519_nP_montgomery25519_SCALARBYTES);
|
||||
randombytes(P,lib25519_nP_montgomery25519_POINTBYTES);
|
||||
randombytes(nP,lib25519_nP_montgomery25519_POINTBYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_nP(nP,n,P);
|
||||
}
|
||||
t_print("nP_montgomery25519",impl,lib25519_nP_montgomery25519_POINTBYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_nG_merged25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"nG")) return;
|
||||
if (targetp && strcmp(targetp,"merged25519")) return;
|
||||
unsigned char *n = alignedcalloc(lib25519_nP_SCALARBYTES);
|
||||
unsigned char *nG = alignedcalloc(lib25519_nP_POINTBYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_nG_merged25519();++impl) {
|
||||
void (*crypto_nG)(unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_nG_merged25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_nG = lib25519_dispatch_nG_merged25519(impl);
|
||||
printf("nG_merged25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_nG_merged25519_implementation(impl),lib25519_dispatch_nG_merged25519_compiler(impl));
|
||||
} else {
|
||||
crypto_nG = lib25519_nG_merged25519;
|
||||
printf("nG_merged25519 selected implementation %s compiler %s\n",lib25519_nG_merged25519_implementation(),lib25519_nG_merged25519_compiler());
|
||||
}
|
||||
randombytes(n,lib25519_nP_SCALARBYTES);
|
||||
randombytes(nG,lib25519_nP_POINTBYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_nG(nG,n);
|
||||
}
|
||||
t_print("nG_merged25519",impl,lib25519_nP_POINTBYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_nG_montgomery25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"nG")) return;
|
||||
if (targetp && strcmp(targetp,"montgomery25519")) return;
|
||||
unsigned char *n = alignedcalloc(lib25519_nP_SCALARBYTES);
|
||||
unsigned char *nG = alignedcalloc(lib25519_nP_POINTBYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_nG_montgomery25519();++impl) {
|
||||
void (*crypto_nG)(unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_nG_montgomery25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_nG = lib25519_dispatch_nG_montgomery25519(impl);
|
||||
printf("nG_montgomery25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_nG_montgomery25519_implementation(impl),lib25519_dispatch_nG_montgomery25519_compiler(impl));
|
||||
} else {
|
||||
crypto_nG = lib25519_nG_montgomery25519;
|
||||
printf("nG_montgomery25519 selected implementation %s compiler %s\n",lib25519_nG_montgomery25519_implementation(),lib25519_nG_montgomery25519_compiler());
|
||||
}
|
||||
randombytes(n,lib25519_nP_SCALARBYTES);
|
||||
randombytes(nG,lib25519_nP_POINTBYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_nG(nG,n);
|
||||
}
|
||||
t_print("nG_montgomery25519",impl,lib25519_nP_POINTBYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_mGnP_ed25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"mGnP")) return;
|
||||
if (targetp && strcmp(targetp,"ed25519")) return;
|
||||
unsigned char *mGnP = alignedcalloc(lib25519_mGnP_ed25519_OUTPUTBYTES);
|
||||
unsigned char *m = alignedcalloc(lib25519_mGnP_ed25519_MBYTES);
|
||||
unsigned char *n = alignedcalloc(lib25519_mGnP_ed25519_NBYTES);
|
||||
unsigned char *P = alignedcalloc(lib25519_mGnP_ed25519_PBYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_mGnP_ed25519();++impl) {
|
||||
void (*crypto_mGnP)(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_mGnP_ed25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_mGnP = lib25519_dispatch_mGnP_ed25519(impl);
|
||||
printf("mGnP_ed25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_mGnP_ed25519_implementation(impl),lib25519_dispatch_mGnP_ed25519_compiler(impl));
|
||||
} else {
|
||||
crypto_mGnP = lib25519_mGnP_ed25519;
|
||||
printf("mGnP_ed25519 selected implementation %s compiler %s\n",lib25519_mGnP_ed25519_implementation(),lib25519_mGnP_ed25519_compiler());
|
||||
}
|
||||
randombytes(mGnP,lib25519_mGnP_ed25519_OUTPUTBYTES);
|
||||
randombytes(m,lib25519_mGnP_ed25519_MBYTES);
|
||||
randombytes(n,lib25519_mGnP_ed25519_NBYTES);
|
||||
randombytes(P,lib25519_mGnP_ed25519_PBYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_mGnP(mGnP,m,n,P);
|
||||
}
|
||||
t_print("mGnP_ed25519",impl,lib25519_mGnP_ed25519_OUTPUTBYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_dh_x25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"dh")) return;
|
||||
if (targetp && strcmp(targetp,"x25519")) return;
|
||||
unsigned char *pka = alignedcalloc(lib25519_dh_x25519_PUBLICKEYBYTES);
|
||||
unsigned char *ska = alignedcalloc(lib25519_dh_x25519_SECRETKEYBYTES);
|
||||
unsigned char *pkb = alignedcalloc(lib25519_dh_x25519_PUBLICKEYBYTES);
|
||||
unsigned char *skb = alignedcalloc(lib25519_dh_x25519_SECRETKEYBYTES);
|
||||
unsigned char *ka = alignedcalloc(lib25519_dh_x25519_BYTES);
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_dh_x25519();++impl) {
|
||||
void (*crypto_dh_keypair)(unsigned char *,unsigned char *);
|
||||
void (*crypto_dh)(unsigned char *,const unsigned char *,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_dh_x25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_dh_keypair = lib25519_dispatch_dh_x25519_keypair(impl);
|
||||
crypto_dh = lib25519_dispatch_dh_x25519(impl);
|
||||
printf("dh_x25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_dh_x25519_implementation(impl),lib25519_dispatch_dh_x25519_compiler(impl));
|
||||
} else {
|
||||
crypto_dh_keypair = lib25519_dh_x25519_keypair;
|
||||
crypto_dh = lib25519_dh_x25519;
|
||||
printf("dh_x25519 selected implementation %s compiler %s\n",lib25519_dh_x25519_implementation(),lib25519_dh_x25519_compiler());
|
||||
}
|
||||
randombytes(pka,lib25519_dh_x25519_PUBLICKEYBYTES);
|
||||
randombytes(ska,lib25519_dh_x25519_SECRETKEYBYTES);
|
||||
randombytes(pkb,lib25519_dh_x25519_PUBLICKEYBYTES);
|
||||
randombytes(skb,lib25519_dh_x25519_SECRETKEYBYTES);
|
||||
randombytes(ka,lib25519_dh_x25519_BYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_dh_keypair(pka,ska);
|
||||
}
|
||||
t_print("dh_x25519_keypair",impl,lib25519_dh_x25519_PUBLICKEYBYTES);
|
||||
crypto_dh_keypair(pkb,skb);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_dh(ka,pkb,ska);
|
||||
}
|
||||
t_print("dh_x25519",impl,lib25519_dh_x25519_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure_sign_ed25519(void)
|
||||
{
|
||||
if (targeto && strcmp(targeto,"sign")) return;
|
||||
if (targetp && strcmp(targetp,"ed25519")) return;
|
||||
unsigned char *pk = alignedcalloc(lib25519_sign_ed25519_PUBLICKEYBYTES);
|
||||
unsigned char *sk = alignedcalloc(lib25519_sign_ed25519_SECRETKEYBYTES);
|
||||
unsigned char *m = alignedcalloc(MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
unsigned char *sm = alignedcalloc(MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
unsigned char *m2 = alignedcalloc(MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
long long mlen;
|
||||
long long smlen;
|
||||
long long m2len;
|
||||
|
||||
for (long long impl = -1;impl < lib25519_numimpl_sign_ed25519();++impl) {
|
||||
void (*crypto_sign_keypair)(unsigned char *,unsigned char *);
|
||||
void (*crypto_sign)(unsigned char *,long long *,const unsigned char *,long long,const unsigned char *);
|
||||
int (*crypto_sign_open)(unsigned char *,long long *,const unsigned char *,long long,const unsigned char *);
|
||||
if (targeti && strcmp(targeti,lib25519_dispatch_sign_ed25519_implementation(impl))) continue;
|
||||
if (impl >= 0) {
|
||||
crypto_sign_keypair = lib25519_dispatch_sign_ed25519_keypair(impl);
|
||||
crypto_sign = lib25519_dispatch_sign_ed25519(impl);
|
||||
crypto_sign_open = lib25519_dispatch_sign_ed25519_open(impl);
|
||||
printf("sign_ed25519 %lld implementation %s compiler %s\n",impl,lib25519_dispatch_sign_ed25519_implementation(impl),lib25519_dispatch_sign_ed25519_compiler(impl));
|
||||
} else {
|
||||
crypto_sign_keypair = lib25519_sign_ed25519_keypair;
|
||||
crypto_sign = lib25519_sign_ed25519;
|
||||
crypto_sign_open = lib25519_sign_ed25519_open;
|
||||
printf("sign_ed25519 selected implementation %s compiler %s\n",lib25519_sign_ed25519_implementation(),lib25519_sign_ed25519_compiler());
|
||||
}
|
||||
randombytes(pk,lib25519_sign_ed25519_PUBLICKEYBYTES);
|
||||
randombytes(sk,lib25519_sign_ed25519_SECRETKEYBYTES);
|
||||
randombytes(m,MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
randombytes(sm,MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
randombytes(m2,MAXTEST_BYTES+lib25519_sign_ed25519_BYTES);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_sign_keypair(pk,sk);
|
||||
}
|
||||
t_print("sign_ed25519_keypair",impl,lib25519_sign_ed25519_PUBLICKEYBYTES);
|
||||
mlen = 0;
|
||||
while (mlen <= MAXTEST_BYTES) {
|
||||
randombytes(m,mlen);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_sign(sm,&smlen,m,mlen,sk);
|
||||
}
|
||||
t_print("sign_ed25519",impl,mlen);
|
||||
mlen += 1+mlen/4;
|
||||
}
|
||||
mlen = 0;
|
||||
while (mlen <= MAXTEST_BYTES) {
|
||||
randombytes(m,mlen);
|
||||
lib25519_sign(sm,&smlen,m,mlen,sk);
|
||||
for (long long i = 0;i <= TIMINGS;++i) {
|
||||
t[i] = cpucycles();
|
||||
crypto_sign_open(m2,&m2len,sm,smlen,pk);
|
||||
}
|
||||
t_print("sign_ed25519_open",impl,mlen);
|
||||
/* this is, in principle, not a test program */
|
||||
/* but some checks here help validate the data flow above */
|
||||
assert(m2len == mlen);
|
||||
assert(!memcmp(m,m2,mlen));
|
||||
mlen += 1+mlen/4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "print_cpuid.inc"
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
printf("lib25519 version %s\n",lib25519_version);
|
||||
printf("lib25519 arch %s\n",lib25519_arch);
|
||||
print_cpuid();
|
||||
|
||||
if (*argv) ++argv;
|
||||
if (*argv) {
|
||||
targeto = *argv++;
|
||||
if (*argv) {
|
||||
targetp = *argv++;
|
||||
if (*argv) {
|
||||
targeti = *argv++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
measure_cpucycles();
|
||||
measure_randombytes();
|
||||
limits();
|
||||
measure_verify_32();
|
||||
measure_hashblocks_sha512();
|
||||
measure_hash_sha512();
|
||||
measure_pow_inv25519();
|
||||
measure_nP_montgomery25519();
|
||||
measure_nG_merged25519();
|
||||
measure_nG_montgomery25519();
|
||||
measure_mGnP_ed25519();
|
||||
measure_dh_x25519();
|
||||
measure_sign_ed25519();
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,17 @@
|
||||
static void limits()
|
||||
{
|
||||
#ifdef RLIM_INFINITY
|
||||
struct rlimit r;
|
||||
r.rlim_cur = 0;
|
||||
r.rlim_max = 0;
|
||||
#ifdef RLIMIT_NOFILE
|
||||
setrlimit(RLIMIT_NOFILE,&r);
|
||||
#endif
|
||||
#ifdef RLIMIT_NPROC
|
||||
setrlimit(RLIMIT_NPROC,&r);
|
||||
#endif
|
||||
#ifdef RLIMIT_CORE
|
||||
setrlimit(RLIMIT_CORE,&r);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
static void print_cpuid(void)
|
||||
{
|
||||
unsigned int cpuid[32];
|
||||
lib25519_cpuid(cpuid,32);
|
||||
printf("cpuid");
|
||||
for (long long j = 0;j < 32;++j)
|
||||
printf(" %08x",cpuid[j]);
|
||||
printf("\n");
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mtune=sandybridge
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mtune=sandybridge
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -mtune=haswell
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -mtune=haswell
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mtune=skylake
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mtune=skylake
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mavx512f -mavx512vl -mtune=skylake
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mavx512f -mavx512vl -mtune=skylake
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mavx512f -mavx512vl -mavx512ifma -mtune=skylake
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mavx -mbmi -mbmi2 -mavx2 -madx -mavx512f -mavx512vl -mavx512ifma -mtune=skylake
|
||||
@@ -0,0 +1,37 @@
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
|
||||
/* 23=mmx; 25=sse; 26=sse2 */
|
||||
|
||||
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<27)|(1<<28))
|
||||
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 27=osxsave; 28=avx */
|
||||
|
||||
#define WANT_7_1 ((1<<3)|(1<<5)|(1<<8)|(1<<16)|(1<<19)|(1<<21)|(1<<31))
|
||||
/* 3=bmi1; 5=avx2; 8=bmi2; 16=avx512_f; 19=adx; 21=avx512_ifma; 31=avx512_vl */
|
||||
|
||||
#define WANT_XCR ((1<<1)|(1<<2))
|
||||
/* 1=xmm; 2=ymm */
|
||||
|
||||
int supports(void)
|
||||
{
|
||||
unsigned int cpuidmax,id0,id1,id2;
|
||||
unsigned int familymodelstepping;
|
||||
unsigned int feature0,feature1,feature2,feature3;
|
||||
unsigned int xcrlow,xcrhigh;
|
||||
|
||||
CPUID(0,0,cpuidmax,id0,id1,id2);
|
||||
if (cpuidmax < 7) return 0;
|
||||
|
||||
CPUID(1,0,familymodelstepping,feature1,feature2,feature3);
|
||||
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
|
||||
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
|
||||
|
||||
CPUID(7,0,feature0,feature1,feature2,feature3);
|
||||
if (WANT_7_1 != (WANT_7_1 & feature1)) return 0;
|
||||
|
||||
asm(".byte 15;.byte 1;.byte 208":"=a"(xcrlow),"=d"(xcrhigh):"c"(0));
|
||||
if (WANT_XCR != (WANT_XCR & xcrlow)) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
|
||||
/* 23=mmx; 25=sse; 26=sse2 */
|
||||
|
||||
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<27)|(1<<28))
|
||||
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 27=osxsave; 28=avx */
|
||||
|
||||
#define WANT_7_1 ((1<<3)|(1<<5)|(1<<8)|(1<<16)|(1<<19)|(1<<31))
|
||||
/* 3=bmi1; 5=avx2; 8=bmi2; 16=avx512_f; 19=adx; 31=avx512_vl */
|
||||
|
||||
#define WANT_XCR ((1<<1)|(1<<2))
|
||||
/* 1=xmm; 2=ymm */
|
||||
|
||||
int supports(void)
|
||||
{
|
||||
unsigned int cpuidmax,id0,id1,id2;
|
||||
unsigned int familymodelstepping;
|
||||
unsigned int feature0,feature1,feature2,feature3;
|
||||
unsigned int xcrlow,xcrhigh;
|
||||
|
||||
CPUID(0,0,cpuidmax,id0,id1,id2);
|
||||
if (cpuidmax < 7) return 0;
|
||||
|
||||
CPUID(1,0,familymodelstepping,feature1,feature2,feature3);
|
||||
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
|
||||
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
|
||||
|
||||
CPUID(7,0,feature0,feature1,feature2,feature3);
|
||||
if (WANT_7_1 != (WANT_7_1 & feature1)) return 0;
|
||||
|
||||
asm(".byte 15;.byte 1;.byte 208":"=a"(xcrlow),"=d"(xcrhigh):"c"(0));
|
||||
if (WANT_XCR != (WANT_XCR & xcrlow)) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
|
||||
/* 23=mmx; 25=sse; 26=sse2 */
|
||||
|
||||
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<27)|(1<<28))
|
||||
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 27=osxsave; 28=avx */
|
||||
|
||||
#define WANT_7_1 ((1<<3)|(1<<5)|(1<<8)|(1<<19))
|
||||
/* 3=bmi1; 5=avx2; 8=bmi2; 19=adx */
|
||||
|
||||
#define WANT_XCR ((1<<1)|(1<<2))
|
||||
/* 1=xmm; 2=ymm */
|
||||
|
||||
int supports(void)
|
||||
{
|
||||
unsigned int cpuidmax,id0,id1,id2;
|
||||
unsigned int familymodelstepping;
|
||||
unsigned int feature0,feature1,feature2,feature3;
|
||||
unsigned int xcrlow,xcrhigh;
|
||||
|
||||
CPUID(0,0,cpuidmax,id0,id1,id2);
|
||||
if (cpuidmax < 7) return 0;
|
||||
|
||||
CPUID(1,0,familymodelstepping,feature1,feature2,feature3);
|
||||
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
|
||||
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
|
||||
|
||||
CPUID(7,0,feature0,feature1,feature2,feature3);
|
||||
if (WANT_7_1 != (WANT_7_1 & feature1)) return 0;
|
||||
|
||||
asm(".byte 15;.byte 1;.byte 208":"=a"(xcrlow),"=d"(xcrhigh):"c"(0));
|
||||
if (WANT_XCR != (WANT_XCR & xcrlow)) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
gcc has __builtin_cpu_supports("avx2")
|
||||
but implemented it incorrectly until 2018:
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100
|
||||
|
||||
as of 2022, many machines still have buggy versions of gcc
|
||||
|
||||
furthermore, why is checking just for avx2 enough?
|
||||
has intel guaranteed that it will never introduce
|
||||
a cpu with avx2 instructions and without (e.g.) sse4.2?
|
||||
|
||||
so manually check cpuid and xgetbv here
|
||||
and include all the "lower" instruction sets
|
||||
rather than trying to guess which ones are implied
|
||||
*/
|
||||
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
|
||||
/* 23=mmx; 25=sse; 26=sse2 */
|
||||
|
||||
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<27)|(1<<28))
|
||||
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 27=osxsave; 28=avx */
|
||||
|
||||
#define WANT_7_1 ((1<<3)|(1<<5)|(1<<8))
|
||||
/* 3=bmi1; 5=avx2; 8=bmi2 */
|
||||
|
||||
#define WANT_XCR ((1<<1)|(1<<2))
|
||||
/* 1=xmm; 2=ymm */
|
||||
|
||||
int supports(void)
|
||||
{
|
||||
unsigned int cpuidmax,id0,id1,id2;
|
||||
unsigned int familymodelstepping;
|
||||
unsigned int feature0,feature1,feature2,feature3;
|
||||
unsigned int xcrlow,xcrhigh;
|
||||
|
||||
CPUID(0,0,cpuidmax,id0,id1,id2);
|
||||
if (cpuidmax < 7) return 0;
|
||||
|
||||
CPUID(1,0,familymodelstepping,feature1,feature2,feature3);
|
||||
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
|
||||
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
|
||||
|
||||
CPUID(7,0,feature0,feature1,feature2,feature3);
|
||||
if (WANT_7_1 != (WANT_7_1 & feature1)) return 0;
|
||||
|
||||
asm(".byte 15;.byte 1;.byte 208":"=a"(xcrlow),"=d"(xcrhigh):"c"(0));
|
||||
if (WANT_XCR != (WANT_XCR & xcrlow)) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
|
||||
/* 23=mmx; 25=sse; 26=sse2 */
|
||||
|
||||
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<27)|(1<<28))
|
||||
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 27=osxsave; 28=avx */
|
||||
|
||||
#define WANT_XCR ((1<<1)|(1<<2))
|
||||
/* 1=xmm; 2=ymm */
|
||||
|
||||
int supports(void)
|
||||
{
|
||||
unsigned int cpuidmax,id0,id1,id2;
|
||||
unsigned int familymodelstepping;
|
||||
unsigned int feature1,feature2,feature3;
|
||||
unsigned int xcrlow,xcrhigh;
|
||||
|
||||
CPUID(0,0,cpuidmax,id0,id1,id2);
|
||||
if (cpuidmax < 1) return 0;
|
||||
|
||||
CPUID(1,0,familymodelstepping,feature1,feature2,feature3);
|
||||
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
|
||||
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
|
||||
|
||||
asm(".byte 15;.byte 1;.byte 208":"=a"(xcrlow),"=d"(xcrhigh):"c"(0));
|
||||
if (WANT_XCR != (WANT_XCR & xcrlow)) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
gcc -Wall -fPIC -fwrapv -O2
|
||||
clang -Wall -fPIC -fwrapv -Qunused-arguments -O2
|
||||
+1159
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
static struct perf_event_attr attr;
|
||||
static int fdperf = -1;
|
||||
static struct perf_event_mmap_page *buf = 0;
|
||||
|
||||
long long ticks_setup(void)
|
||||
{
|
||||
if (fdperf == -1) {
|
||||
attr.type = PERF_TYPE_HARDWARE;
|
||||
attr.config = PERF_COUNT_HW_CPU_CYCLES;
|
||||
attr.exclude_kernel = 1;
|
||||
fdperf = syscall(__NR_perf_event_open,&attr,0,-1,-1,0);
|
||||
if (fdperf == -1) return 0;
|
||||
buf = mmap(NULL,sysconf(_SC_PAGESIZE),PROT_READ,MAP_SHARED,fdperf,0);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
long long ticks(void)
|
||||
{
|
||||
long long result;
|
||||
unsigned int seq;
|
||||
long long index;
|
||||
long long offset;
|
||||
|
||||
do {
|
||||
seq = buf->lock;
|
||||
asm volatile("" ::: "memory");
|
||||
index = buf->index;
|
||||
offset = buf->offset;
|
||||
asm volatile("rdpmc;shlq $32,%%rdx;orq %%rdx,%%rax"
|
||||
: "=a"(result) : "c"(index-1) : "%rdx");
|
||||
asm volatile("" ::: "memory");
|
||||
} while (buf->lock != seq);
|
||||
|
||||
result += offset;
|
||||
result &= 0xffffffffffff;
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
long long ticks_setup(void)
|
||||
{
|
||||
return -2;
|
||||
}
|
||||
|
||||
long long ticks(void)
|
||||
{
|
||||
unsigned long long result;
|
||||
asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
|
||||
: "=a"(result) :: "%rdx");
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
#ifndef cpucycles_h
|
||||
#define cpucycles_h
|
||||
|
||||
extern long long (*cpucycles)(void) __attribute__((visibility("default")));;
|
||||
extern long long cpucycles_init(void) __attribute__((visibility("default")));;
|
||||
extern const char *cpucycles_implementation(void) __attribute__((visibility("default")));;
|
||||
extern long long cpucycles_persecond(void) __attribute__((visibility("default")));;
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,19 @@
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
long long ticks_setup(void)
|
||||
{
|
||||
return 1000000;
|
||||
}
|
||||
|
||||
long long ticks(void)
|
||||
{
|
||||
struct timeval t;
|
||||
long long result;
|
||||
gettimeofday(&t,(struct timezone *) 0);
|
||||
result = t.tv_sec;
|
||||
result *= 1000000;
|
||||
result += t.tv_usec;
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
long long ticks_setup(void)
|
||||
{
|
||||
return 1000000000;
|
||||
}
|
||||
|
||||
long long ticks(void)
|
||||
{
|
||||
struct timespec t;
|
||||
long long result;
|
||||
clock_gettime(CLOCK_MONOTONIC,&t);
|
||||
result = t.tv_sec;
|
||||
result *= 1000000000;
|
||||
result += t.tv_nsec;
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
long long ticks_setup(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
long long ticks(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
amd64-pmc
|
||||
amd64-tsc
|
||||
default-monotonic
|
||||
default-gettimeofday
|
||||
@@ -0,0 +1,216 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cpucycles.h"
|
||||
|
||||
static double osfreq(void)
|
||||
{
|
||||
FILE *f;
|
||||
char *x;
|
||||
double result;
|
||||
int s;
|
||||
|
||||
f = fopen("/etc/cpucyclespersecond", "r");
|
||||
if (f) {
|
||||
s = fscanf(f,"%lf",&result);
|
||||
fclose(f);
|
||||
if (s > 0) return result;
|
||||
}
|
||||
|
||||
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed", "r");
|
||||
if (f) {
|
||||
s = fscanf(f,"%lf",&result);
|
||||
fclose(f);
|
||||
if (s > 0) return 1000.0 * result;
|
||||
}
|
||||
|
||||
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r");
|
||||
if (f) {
|
||||
s = fscanf(f,"%lf",&result);
|
||||
fclose(f);
|
||||
if (s > 0) return 1000.0 * result;
|
||||
}
|
||||
|
||||
f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r");
|
||||
if (f) {
|
||||
s = fscanf(f,"%lf",&result);
|
||||
fclose(f);
|
||||
if (s > 0) return result;
|
||||
}
|
||||
|
||||
f = fopen("/proc/cpuinfo","r");
|
||||
if (f) {
|
||||
for (;;) {
|
||||
s = fscanf(f,"cpu MHz : %lf",&result);
|
||||
if (s > 0) break;
|
||||
if (s == 0) s = fscanf(f,"%*[^\n]\n");
|
||||
if (s < 0) { result = 0; break; }
|
||||
}
|
||||
fclose(f);
|
||||
if (result) return 1000000.0 * result;
|
||||
}
|
||||
|
||||
f = fopen("/proc/cpuinfo","r");
|
||||
if (f) {
|
||||
for (;;) {
|
||||
s = fscanf(f,"clock : %lf",&result);
|
||||
if (s > 0) break;
|
||||
if (s == 0) s = fscanf(f,"%*[^\n]\n");
|
||||
if (s < 0) { result = 0; break; }
|
||||
}
|
||||
fclose(f);
|
||||
if (result) return 1000000.0 * result;
|
||||
}
|
||||
|
||||
f = popen("sysctl hw.cpufrequency 2>/dev/null","r");
|
||||
if (f) {
|
||||
s = fscanf(f,"hw.cpufrequency: %lf",&result);
|
||||
pclose(f);
|
||||
if (s > 0) if (result > 0) return result;
|
||||
}
|
||||
|
||||
f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r");
|
||||
if (f) {
|
||||
s = fscanf(f,"frequency %lf",&result);
|
||||
pclose(f);
|
||||
if (s > 0) return result;
|
||||
}
|
||||
|
||||
f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r");
|
||||
if (f) {
|
||||
for (;;) {
|
||||
s = fscanf(f," The %*s processor operates at %lf MHz",&result);
|
||||
if (s > 0) break;
|
||||
if (s == 0) s = fscanf(f,"%*[^\n]\n");
|
||||
if (s < 0) { result = 0; break; }
|
||||
}
|
||||
pclose(f);
|
||||
if (result) return 1000000.0 * result;
|
||||
}
|
||||
|
||||
x = getenv("cpucyclespersecond");
|
||||
if (x) {
|
||||
s = sscanf(x,"%lf",&result);
|
||||
if (s > 0) return result;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long long persecond = 0;
|
||||
const char *implementation = "none";
|
||||
|
||||
long long (*cpucycles)(void) = cpucycles_init;
|
||||
|
||||
const char *cpucycles_implementation(void)
|
||||
{
|
||||
cpucycles();
|
||||
return implementation;
|
||||
}
|
||||
|
||||
long long cpucycles_persecond(void)
|
||||
{
|
||||
cpucycles();
|
||||
return persecond;
|
||||
}
|
||||
|
||||
static double cpucycles_scaled_scaling = 0;
|
||||
static long long (*cpucycles_scaled_from)(void) = 0;
|
||||
|
||||
static long long cpucycles_scaled(void)
|
||||
{
|
||||
return cpucycles_scaled_from()*cpucycles_scaled_scaling;
|
||||
}
|
||||
|
||||
#include "options.inc"
|
||||
|
||||
#define CALLS 1000
|
||||
|
||||
long long cpucycles_init(void)
|
||||
{
|
||||
long long precision[NUMOPTIONS];
|
||||
long long scaling[NUMOPTIONS];
|
||||
long long bestprecision;
|
||||
long long bestopt;
|
||||
|
||||
persecond = osfreq();
|
||||
|
||||
for (long long opt = 0;opt < NUMOPTIONS;++opt) {
|
||||
long long freq = options[opt].ticks_setup();
|
||||
|
||||
// freq > 0: freq ticks per second
|
||||
// freq == 0: do not use
|
||||
// freq == -1: cycle counter (e.g., rdpmc)
|
||||
// freq == -2: probably cycle counter (e.g., rdtsc)
|
||||
// freq == -3: tick counter every N cycles for some unknown N
|
||||
|
||||
precision[opt] = 0;
|
||||
|
||||
if (freq > 0) { // means: freq ticks per second
|
||||
scaling[opt] = persecond*1.0/freq;
|
||||
} else if (freq == -1) { // means: cycle counter; e.g., rdpmc
|
||||
scaling[opt] = 1.0;
|
||||
} else if (freq == -2) { // means: probably cycle counter; e.g., rdtsc
|
||||
scaling[opt] = 1.0;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (long long tries = 0;tries < 10;++tries) {
|
||||
long long t[CALLS+1];
|
||||
long long ok = 1;
|
||||
|
||||
if (scaling[opt] == 1.0) {
|
||||
for (long long i = 0;i <= CALLS;++i)
|
||||
t[i] = options[opt].ticks();
|
||||
} else {
|
||||
double scalingopt = scaling[opt];
|
||||
for (long long i = 0;i <= CALLS;++i)
|
||||
t[i] = options[opt].ticks()*scalingopt;
|
||||
}
|
||||
for (long long i = 0;i < CALLS;++i)
|
||||
if (t[i] > t[i+1])
|
||||
ok = 0;
|
||||
if (t[0] == t[CALLS])
|
||||
ok = 0;
|
||||
|
||||
if (ok) {
|
||||
long long smallestdiff = 0;
|
||||
for (long long i = 0;i < CALLS;++i) {
|
||||
long long diff = t[i+1]-t[i];
|
||||
if (diff <= 0) continue;
|
||||
if (smallestdiff == 0 || diff < smallestdiff)
|
||||
smallestdiff = diff;
|
||||
}
|
||||
precision[opt] = smallestdiff;
|
||||
if (freq != -1)
|
||||
precision[opt] += 100;
|
||||
break;
|
||||
}
|
||||
|
||||
// otherwise keep trying
|
||||
// since !ok can be caused by overflow
|
||||
// or by core swap
|
||||
}
|
||||
}
|
||||
|
||||
bestopt = DEFAULTOPTION;
|
||||
bestprecision = 0;
|
||||
for (long long opt = 0;opt < NUMOPTIONS;++opt)
|
||||
if (precision[opt] > 0)
|
||||
if (!bestprecision || precision[opt] < bestprecision) {
|
||||
bestopt = opt;
|
||||
bestprecision = precision[opt];
|
||||
}
|
||||
|
||||
implementation = options[bestopt].implementation;
|
||||
|
||||
if (scaling[bestopt] == 1.0) {
|
||||
cpucycles = options[bestopt].ticks;
|
||||
} else {
|
||||
cpucycles_scaled_scaling = scaling[bestopt];
|
||||
cpucycles_scaled_from = options[bestopt].ticks;
|
||||
cpucycles = cpucycles_scaled;
|
||||
}
|
||||
|
||||
return cpucycles();
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define CPUID(func,leaf,a,b,c,d) \
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"a"(func),"c"(leaf):)
|
||||
|
||||
__attribute__((visibility("default")))
|
||||
void lib25519_cpuid(unsigned int *result,long long resultlen)
|
||||
{
|
||||
unsigned int a,b,c,d;
|
||||
unsigned int cpuidmax,extendedcpuidmax;
|
||||
int havexgetbv = 0;
|
||||
|
||||
CPUID(0,0,a,b,c,d);
|
||||
cpuidmax = a;
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
CPUID(0x80000000,0,a,b,c,d);
|
||||
extendedcpuidmax = a;
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (extendedcpuidmax >= 0x80000002) CPUID(0x80000002,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (extendedcpuidmax >= 0x80000003) CPUID(0x80000003,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (extendedcpuidmax >= 0x80000004) CPUID(0x80000004,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (cpuidmax >= 1) CPUID(1,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
/* 27=osxsave; 28=avx */
|
||||
if (((1<<27)|(1<<28)) == (((1<<27)|(1<<28)) & c))
|
||||
havexgetbv = 1;
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (cpuidmax >= 7) CPUID(7,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (extendedcpuidmax >= 0x80000001) CPUID(0x80000001,0,a,b,c,d);
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = b; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = c; --resultlen; }
|
||||
if (resultlen > 0) { *result++ = d; --resultlen; }
|
||||
|
||||
a = b = c = d = 0;
|
||||
if (havexgetbv) asm(".byte 15;.byte 1;.byte 208":"=a"(a),"=d"(d):"c"(0));
|
||||
if (resultlen > 0) { *result++ = a; --resultlen; }
|
||||
|
||||
while (resultlen > 0) { *result++ = 0; --resultlen; }
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
__attribute__((visibility("default")))
|
||||
void lib25519_cpuid(unsigned int *result,long long resultlen)
|
||||
{
|
||||
while (resultlen > 0) { *result++ = 0; --resultlen; }
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
#include "crypto_nP_montgomery25519.h"
|
||||
#include "crypto_dh.h"
|
||||
|
||||
void crypto_dh(unsigned char *abshared,
|
||||
const unsigned char *bobpk,
|
||||
const unsigned char *alicesk)
|
||||
{
|
||||
crypto_nP_montgomery25519(abshared,alicesk,bobpk);
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
#include "crypto_nG_montgomery25519.h"
|
||||
#include "randombytes.h"
|
||||
#include "crypto_dh.h"
|
||||
|
||||
void crypto_dh_keypair(unsigned char *pk,unsigned char *sk)
|
||||
{
|
||||
randombytes(sk,crypto_dh_SECRETKEYBYTES);
|
||||
crypto_nG_montgomery25519(pk,sk);
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../ref/api.h
|
||||
@@ -0,0 +1 @@
|
||||
../../../crypto_pow/inv25519/sandy2x/architectures
|
||||
@@ -0,0 +1,78 @@
|
||||
#include <immintrin.h>
|
||||
#include "crypto_hashblocks_sha512.h"
|
||||
#include "crypto_hash.h"
|
||||
|
||||
#define blocks crypto_hashblocks_sha512
|
||||
|
||||
#define ALIGNED __attribute((aligned(32)))
|
||||
|
||||
static const ALIGNED unsigned char iv[64] = {
|
||||
0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
|
||||
0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
|
||||
0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
|
||||
0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
|
||||
0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
|
||||
0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
|
||||
0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
|
||||
0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
|
||||
} ;
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
#define load256(x) (_mm256_loadu_si256((void *) (x)))
|
||||
#define store256(x,y) (_mm256_storeu_si256((void *) (x),y))
|
||||
|
||||
void crypto_hash(unsigned char *out,const unsigned char *in,long long inlen)
|
||||
{
|
||||
ALIGNED unsigned char h[64];
|
||||
ALIGNED unsigned char padded[256];
|
||||
unsigned long long i;
|
||||
unsigned long long bytes = inlen;
|
||||
__m256i X0,X1;
|
||||
|
||||
X0 = load256(iv);
|
||||
X1 = load256(iv + 32);
|
||||
|
||||
store256(h,X0);
|
||||
store256(h + 32,X1);
|
||||
|
||||
blocks(h,in,inlen);
|
||||
in += inlen;
|
||||
inlen &= 127;
|
||||
in -= inlen;
|
||||
|
||||
X0 ^= X0;
|
||||
|
||||
if (inlen < 112) {
|
||||
store256(padded,X0);
|
||||
store256(padded + 32,X0);
|
||||
store256(padded + 64,X0);
|
||||
store256(padded + 96,X0);
|
||||
|
||||
for (i = 0;i < inlen;++i) padded[i] = in[i];
|
||||
padded[inlen] = 0x80;
|
||||
|
||||
padded[119] = bytes >> 61;
|
||||
*(uint64 *) (padded + 120) = __builtin_bswap64(bytes << 3);
|
||||
blocks(h,padded,128);
|
||||
} else {
|
||||
store256(padded + 96,X0);
|
||||
store256(padded + 128,X0);
|
||||
store256(padded + 160,X0);
|
||||
store256(padded + 192,X0);
|
||||
store256(padded + 224,X0);
|
||||
|
||||
for (i = 0;i < inlen;++i) padded[i] = in[i];
|
||||
padded[inlen] = 0x80;
|
||||
|
||||
padded[247] = bytes >> 61;
|
||||
*(uint64 *) (padded + 248) = __builtin_bswap64(bytes << 3);
|
||||
blocks(h,padded,256);
|
||||
}
|
||||
|
||||
X0 = load256(h);
|
||||
X1 = load256(h + 32);
|
||||
|
||||
store256(out,X0);
|
||||
store256(out + 32,X1);
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../ref/implementors
|
||||
@@ -0,0 +1 @@
|
||||
#define CRYPTO_BYTES 64
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
20080913
|
||||
D. J. Bernstein
|
||||
Public domain.
|
||||
*/
|
||||
|
||||
#include "crypto_hashblocks_sha512.h"
|
||||
#include "crypto_hash.h"
|
||||
|
||||
#define blocks crypto_hashblocks_sha512
|
||||
|
||||
static const unsigned char iv[64] = {
|
||||
0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
|
||||
0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
|
||||
0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
|
||||
0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
|
||||
0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
|
||||
0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
|
||||
0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
|
||||
0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
|
||||
} ;
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
void crypto_hash(unsigned char *out,const unsigned char *in,long long inlen)
|
||||
{
|
||||
unsigned char h[64];
|
||||
unsigned char padded[256];
|
||||
int i;
|
||||
unsigned long long bytes = inlen;
|
||||
|
||||
for (i = 0;i < 64;++i) h[i] = iv[i];
|
||||
|
||||
blocks(h,in,inlen);
|
||||
in += inlen;
|
||||
inlen &= 127;
|
||||
in -= inlen;
|
||||
|
||||
for (i = 0;i < inlen;++i) padded[i] = in[i];
|
||||
padded[inlen] = 0x80;
|
||||
|
||||
if (inlen < 112) {
|
||||
for (i = inlen + 1;i < 119;++i) padded[i] = 0;
|
||||
padded[119] = bytes >> 61;
|
||||
padded[120] = bytes >> 53;
|
||||
padded[121] = bytes >> 45;
|
||||
padded[122] = bytes >> 37;
|
||||
padded[123] = bytes >> 29;
|
||||
padded[124] = bytes >> 21;
|
||||
padded[125] = bytes >> 13;
|
||||
padded[126] = bytes >> 5;
|
||||
padded[127] = bytes << 3;
|
||||
blocks(h,padded,128);
|
||||
} else {
|
||||
for (i = inlen + 1;i < 247;++i) padded[i] = 0;
|
||||
padded[247] = bytes >> 61;
|
||||
padded[248] = bytes >> 53;
|
||||
padded[249] = bytes >> 45;
|
||||
padded[250] = bytes >> 37;
|
||||
padded[251] = bytes >> 29;
|
||||
padded[252] = bytes >> 21;
|
||||
padded[253] = bytes >> 13;
|
||||
padded[254] = bytes >> 5;
|
||||
padded[255] = bytes << 3;
|
||||
blocks(h,padded,256);
|
||||
}
|
||||
|
||||
for (i = 0;i < 64;++i) out[i] = h[i];
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
Daniel J. Bernstein (wrapper around crypto_hashblocks/sha512)
|
||||
@@ -0,0 +1 @@
|
||||
../m3/api.h
|
||||
@@ -0,0 +1,2 @@
|
||||
amd64 avx2
|
||||
x86 avx2
|
||||
@@ -0,0 +1 @@
|
||||
../m3/constants.c
|
||||
@@ -0,0 +1 @@
|
||||
../m3/implementors
|
||||
@@ -0,0 +1,249 @@
|
||||
#include <immintrin.h>
|
||||
#include "inner.h"
|
||||
|
||||
#define uint64 crypto_uint64
|
||||
|
||||
static uint64 load_bigendian(const unsigned char *x)
|
||||
{
|
||||
return __builtin_bswap64(*(uint64 *) x);
|
||||
}
|
||||
|
||||
static void store_bigendian(unsigned char *x,uint64 u)
|
||||
{
|
||||
*(uint64 *) x = __builtin_bswap64(u);
|
||||
}
|
||||
|
||||
#define SHR(x,c) ((x) >> (c))
|
||||
#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
|
||||
#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
|
||||
#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) ((x & (y ^ z)) ^ (y & z))
|
||||
#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
|
||||
#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
|
||||
|
||||
#define ALIGNED __attribute((aligned(32)))
|
||||
|
||||
#define load64(x) (*(uint64 *) (x))
|
||||
|
||||
#define store256(x,y) (*(volatile __m256i *) (x) = (y))
|
||||
|
||||
#define bigendian64 _mm256_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)
|
||||
|
||||
#define PREEXPANDx4(X0,X9,X1) \
|
||||
X0 = _mm256_add_epi64(X0, \
|
||||
_mm256_srli_epi64(X1,1) ^ _mm256_slli_epi64(X1,63) ^ \
|
||||
_mm256_srli_epi64(X1,8) ^ _mm256_slli_epi64(X1,56) ^ \
|
||||
_mm256_srli_epi64(X1,7) \
|
||||
); \
|
||||
X0 = _mm256_add_epi64(X0,X9);
|
||||
|
||||
#define POSTEXPANDx4(X0,W0,W2,W14) \
|
||||
W0 = ( \
|
||||
_mm256_extracti128_si256(X0,0)); \
|
||||
W0 = _mm_add_epi64(W0, \
|
||||
_mm_srli_epi64(W14,19) ^ _mm_slli_epi64(W14,45) ^ \
|
||||
_mm_srli_epi64(W14,61) ^ _mm_slli_epi64(W14,3) ^ \
|
||||
_mm_srli_epi64(W14,6)); \
|
||||
W2 = ( \
|
||||
_mm256_extracti128_si256(X0,1)); \
|
||||
W2 = _mm_add_epi64(W2, \
|
||||
_mm_srli_epi64(W0,19) ^ _mm_slli_epi64(W0,45) ^ \
|
||||
_mm_srli_epi64(W0,61) ^ _mm_slli_epi64(W0,3) ^ \
|
||||
_mm_srli_epi64(W0,6)); \
|
||||
X0 = _mm256_insertf128_si256(_mm256_castsi128_si256(W0),W2,1);
|
||||
|
||||
#define ROUND0(i,r0,r1,r2,r3,r4,r5,r6,r7) \
|
||||
r7 += load64(&wc[i]); \
|
||||
r7 += Ch(r4,r5,r6); \
|
||||
r7 += Sigma1(r4); \
|
||||
r3 += r7; \
|
||||
r7 += Maj(r2,r0,r1); \
|
||||
r7 += Sigma0(r0); \
|
||||
|
||||
#define ROUND1(i,r0,r1,r2,r3,r4,r5,r6,r7) \
|
||||
r7 += load64(&wc[i]); \
|
||||
r7 += Ch(r4,r5,r6); \
|
||||
r7 += Sigma1(r4); \
|
||||
r3 += r7; \
|
||||
r7 += Maj(r0,r1,r2); \
|
||||
r7 += Sigma0(r0); \
|
||||
|
||||
int inner(unsigned char *statebytes,const unsigned char *in,unsigned int inlen,const uint64 *constants)
|
||||
{
|
||||
ALIGNED uint64 state[8];
|
||||
ALIGNED uint64 w[20];
|
||||
ALIGNED uint64 wc[16]; /* w[i]+constants[i] */
|
||||
uint64 r0,r1,r2,r3,r4,r5,r6,r7;
|
||||
__m128i W0,W2,W4,W6,W8,W10,W12,W14;
|
||||
__m256i X0,X1,X4,X5,X8,X9,X12,X13;
|
||||
__m256i D0,D4,D8,D12;
|
||||
int i;
|
||||
|
||||
state[0] = r0 = load_bigendian(statebytes);
|
||||
state[1] = r1 = load_bigendian(statebytes+8);
|
||||
state[2] = r2 = load_bigendian(statebytes+16);
|
||||
state[3] = r3 = load_bigendian(statebytes+24);
|
||||
state[4] = r4 = load_bigendian(statebytes+32);
|
||||
state[5] = r5 = load_bigendian(statebytes+40);
|
||||
state[6] = r6 = load_bigendian(statebytes+48);
|
||||
state[7] = r7 = load_bigendian(statebytes+56);
|
||||
|
||||
do {
|
||||
X0 = _mm256_loadu_si256((void *) (in+0));
|
||||
X0 = _mm256_shuffle_epi8(X0,bigendian64);
|
||||
D0 = _mm256_loadu_si256((void *) &constants[0]);
|
||||
D0 = _mm256_add_epi64(X0,D0);
|
||||
store256(&wc[0],D0);
|
||||
store256(&w[0],X0);
|
||||
store256(&w[16],X0);
|
||||
|
||||
X4 = _mm256_loadu_si256((void *) (in+32));
|
||||
X4 = _mm256_shuffle_epi8(X4,bigendian64);
|
||||
D4 = _mm256_loadu_si256((void *) &constants[4]);
|
||||
D4 = _mm256_add_epi64(X4,D4);
|
||||
store256(&wc[4],D4);
|
||||
store256(&w[4],X4);
|
||||
|
||||
ROUND0(0,r0,r1,r2,r3,r4,r5,r6,r7)
|
||||
ROUND1(1,r7,r0,r1,r2,r3,r4,r5,r6)
|
||||
|
||||
X8 = _mm256_loadu_si256((void *) (in+64));
|
||||
X8 = _mm256_shuffle_epi8(X8,bigendian64);
|
||||
D8 = _mm256_loadu_si256((void *) &constants[8]);
|
||||
D8 = _mm256_add_epi64(X8,D8);
|
||||
store256(&wc[8],D8);
|
||||
store256(&w[8],X8);
|
||||
|
||||
ROUND0(2,r6,r7,r0,r1,r2,r3,r4,r5)
|
||||
ROUND1(3,r5,r6,r7,r0,r1,r2,r3,r4)
|
||||
|
||||
ROUND0(4,r4,r5,r6,r7,r0,r1,r2,r3)
|
||||
ROUND1(5,r3,r4,r5,r6,r7,r0,r1,r2)
|
||||
|
||||
X12 = _mm256_loadu_si256((void *) (in+96));
|
||||
X12 = _mm256_shuffle_epi8(X12,bigendian64);
|
||||
D12 = _mm256_loadu_si256((void *) &constants[12]);
|
||||
D12 = _mm256_add_epi64(X12,D12);
|
||||
store256(&wc[12],D12);
|
||||
store256(&w[12],X12);
|
||||
|
||||
ROUND0(6,r2,r3,r4,r5,r6,r7,r0,r1)
|
||||
ROUND1(7,r1,r2,r3,r4,r5,r6,r7,r0)
|
||||
|
||||
ROUND0(8,r0,r1,r2,r3,r4,r5,r6,r7)
|
||||
ROUND1(9,r7,r0,r1,r2,r3,r4,r5,r6)
|
||||
|
||||
|
||||
for (i = 4;i > 0;--i) {
|
||||
|
||||
constants += 16;
|
||||
|
||||
X1 = _mm256_loadu_si256((void *) (w+1));
|
||||
X9 = _mm256_loadu_si256((void *) (w+9));
|
||||
PREEXPANDx4(X0,X9,X1)
|
||||
|
||||
W14 = _mm_loadu_si128((void *) (w+14));
|
||||
POSTEXPANDx4(X0,W0,W2,W14)
|
||||
|
||||
D0 = _mm256_loadu_si256((void *) &constants[0]);
|
||||
D0 = _mm256_add_epi64(X0,D0);
|
||||
store256(&wc[0],D0);
|
||||
store256(w+16,X0);
|
||||
store256(w+0,X0);
|
||||
|
||||
ROUND0(10,r6,r7,r0,r1,r2,r3,r4,r5)
|
||||
ROUND1(11,r5,r6,r7,r0,r1,r2,r3,r4)
|
||||
|
||||
ROUND0(12,r4,r5,r6,r7,r0,r1,r2,r3)
|
||||
ROUND1(13,r3,r4,r5,r6,r7,r0,r1,r2)
|
||||
|
||||
X5 = _mm256_loadu_si256((void *) (w+5));
|
||||
X13 = _mm256_loadu_si256((void *) (w+13));
|
||||
PREEXPANDx4(X4,X13,X5)
|
||||
|
||||
W2 = _mm_loadu_si128((void *) (w+2));
|
||||
POSTEXPANDx4(X4,W4,W6,W2)
|
||||
|
||||
D4 = _mm256_loadu_si256((void *) &constants[4]);
|
||||
D4 = _mm256_add_epi64(X4,D4);
|
||||
store256(&wc[4],D4);
|
||||
store256(w+4,X4);
|
||||
|
||||
ROUND0(14,r2,r3,r4,r5,r6,r7,r0,r1)
|
||||
ROUND1(15,r1,r2,r3,r4,r5,r6,r7,r0)
|
||||
|
||||
ROUND0(0,r0,r1,r2,r3,r4,r5,r6,r7)
|
||||
ROUND1(1,r7,r0,r1,r2,r3,r4,r5,r6)
|
||||
|
||||
X9 = _mm256_loadu_si256((void *) (w+9));
|
||||
X1 = _mm256_loadu_si256((void *) (w+1));
|
||||
PREEXPANDx4(X8,X1,X9)
|
||||
|
||||
W6 = _mm_loadu_si128((void *) (w+6));
|
||||
POSTEXPANDx4(X8,W8,W10,W6)
|
||||
|
||||
D8 = _mm256_loadu_si256((void *) &constants[8]);
|
||||
D8 = _mm256_add_epi64(X8,D8);
|
||||
store256(&wc[8],D8);
|
||||
store256(w+8,X8);
|
||||
|
||||
ROUND0(2,r6,r7,r0,r1,r2,r3,r4,r5)
|
||||
ROUND1(3,r5,r6,r7,r0,r1,r2,r3,r4)
|
||||
|
||||
ROUND0(4,r4,r5,r6,r7,r0,r1,r2,r3)
|
||||
ROUND1(5,r3,r4,r5,r6,r7,r0,r1,r2)
|
||||
|
||||
X13 = _mm256_loadu_si256((void *) (w+13));
|
||||
X5 = _mm256_loadu_si256((void *) (w+5));
|
||||
PREEXPANDx4(X12,X5,X13)
|
||||
|
||||
W10 = _mm_loadu_si128((void *) (w+10));
|
||||
POSTEXPANDx4(X12,W12,W14,W10)
|
||||
|
||||
D12 = _mm256_loadu_si256((void *) &constants[12]);
|
||||
D12 = _mm256_add_epi64(X12,D12);
|
||||
store256(&wc[12],D12);
|
||||
store256(w+12,X12);
|
||||
|
||||
ROUND0(6,r2,r3,r4,r5,r6,r7,r0,r1)
|
||||
ROUND1(7,r1,r2,r3,r4,r5,r6,r7,r0)
|
||||
|
||||
ROUND0(8,r0,r1,r2,r3,r4,r5,r6,r7)
|
||||
ROUND1(9,r7,r0,r1,r2,r3,r4,r5,r6)
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
ROUND0(10,r6,r7,r0,r1,r2,r3,r4,r5)
|
||||
ROUND1(11,r5,r6,r7,r0,r1,r2,r3,r4)
|
||||
|
||||
ROUND0(12,r4,r5,r6,r7,r0,r1,r2,r3)
|
||||
ROUND1(13,r3,r4,r5,r6,r7,r0,r1,r2)
|
||||
|
||||
ROUND0(14,r2,r3,r4,r5,r6,r7,r0,r1)
|
||||
ROUND1(15,r1,r2,r3,r4,r5,r6,r7,r0)
|
||||
}
|
||||
|
||||
|
||||
constants -= 64;
|
||||
|
||||
r0 += state[0]; state[0] = r0;
|
||||
r1 += state[1]; state[1] = r1;
|
||||
r2 += state[2]; state[2] = r2;
|
||||
r3 += state[3]; state[3] = r3;
|
||||
r4 += state[4]; state[4] = r4;
|
||||
r5 += state[5]; state[5] = r5;
|
||||
r6 += state[6]; state[6] = r6;
|
||||
r7 += state[7]; state[7] = r7;
|
||||
|
||||
in += 128;
|
||||
inlen -= 128;
|
||||
} while (inlen >= 128);
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
store_bigendian(statebytes+8*i,state[i]);
|
||||
|
||||
return inlen;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef inner_h
|
||||
#define inner_h
|
||||
|
||||
#define inner CRYPTO_NAMESPACE(inner)
|
||||
|
||||
#include "crypto_uint64.h"
|
||||
|
||||
extern int inner(unsigned char *,const unsigned char *,unsigned int,const crypto_uint64 *);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,68 @@
|
||||
inner.S: \
|
||||
round01.q \
|
||||
round23.q \
|
||||
round45.q \
|
||||
round67.q \
|
||||
round89.q \
|
||||
round1011.q \
|
||||
round1213.q \
|
||||
round1415.q \
|
||||
expand0.q \
|
||||
expand4.q \
|
||||
expand8.q \
|
||||
expand12.q \
|
||||
rer0.q \
|
||||
rer4.q \
|
||||
rer8.q \
|
||||
rer12.q \
|
||||
inner.q \
|
||||
inner.S.do
|
||||
./inner.S.do < inner.q > inner.S
|
||||
|
||||
round01.q: round.py
|
||||
./round.py 0 > round01.q
|
||||
|
||||
round23.q: round.py
|
||||
./round.py 1 > round23.q
|
||||
|
||||
round45.q: round.py
|
||||
./round.py 2 > round45.q
|
||||
|
||||
round67.q: round.py
|
||||
./round.py 3 > round67.q
|
||||
|
||||
round89.q: round.py
|
||||
./round.py 4 > round89.q
|
||||
|
||||
round1011.q: round.py
|
||||
./round.py 5 > round1011.q
|
||||
|
||||
round1213.q: round.py
|
||||
./round.py 6 > round1213.q
|
||||
|
||||
round1415.q: round.py
|
||||
./round.py 7 > round1415.q
|
||||
|
||||
expand0.q: expand.py
|
||||
./expand.py 0 > expand0.q
|
||||
|
||||
expand4.q: expand.py
|
||||
./expand.py 4 > expand4.q
|
||||
|
||||
expand8.q: expand.py
|
||||
./expand.py 8 > expand8.q
|
||||
|
||||
expand12.q: expand.py
|
||||
./expand.py 12 > expand12.q
|
||||
|
||||
rer0.q: rer.py
|
||||
./rer.py 0 > rer0.q
|
||||
|
||||
rer4.q: rer.py
|
||||
./rer.py 4 > rer4.q
|
||||
|
||||
rer8.q: rer.py
|
||||
./rer.py 8 > rer8.q
|
||||
|
||||
rer12.q: rer.py
|
||||
./rer.py 12 > rer12.q
|
||||
@@ -0,0 +1 @@
|
||||
../m3/api.h
|
||||
@@ -0,0 +1,2 @@
|
||||
amd64 bmi2 avx2
|
||||
x86 bmi2 avx2
|
||||
@@ -0,0 +1,104 @@
|
||||
#include "crypto_hashblocks.h"
|
||||
#include "inner.h"
|
||||
|
||||
static const crypto_uint64 constants[84] = {
|
||||
0x428a2f98d728ae22ULL
|
||||
, 0x7137449123ef65cdULL
|
||||
, 0xb5c0fbcfec4d3b2fULL
|
||||
, 0xe9b5dba58189dbbcULL
|
||||
, 0x3956c25bf348b538ULL
|
||||
, 0x59f111f1b605d019ULL
|
||||
, 0x923f82a4af194f9bULL
|
||||
, 0xab1c5ed5da6d8118ULL
|
||||
, 0xd807aa98a3030242ULL
|
||||
, 0x12835b0145706fbeULL
|
||||
, 0x243185be4ee4b28cULL
|
||||
, 0x550c7dc3d5ffb4e2ULL
|
||||
, 0x72be5d74f27b896fULL
|
||||
, 0x80deb1fe3b1696b1ULL
|
||||
, 0x9bdc06a725c71235ULL
|
||||
, 0xc19bf174cf692694ULL
|
||||
, 0xe49b69c19ef14ad2ULL
|
||||
, 0xefbe4786384f25e3ULL
|
||||
, 0x0fc19dc68b8cd5b5ULL
|
||||
, 0x240ca1cc77ac9c65ULL
|
||||
, 0x2de92c6f592b0275ULL
|
||||
, 0x4a7484aa6ea6e483ULL
|
||||
, 0x5cb0a9dcbd41fbd4ULL
|
||||
, 0x76f988da831153b5ULL
|
||||
, 0x983e5152ee66dfabULL
|
||||
, 0xa831c66d2db43210ULL
|
||||
, 0xb00327c898fb213fULL
|
||||
, 0xbf597fc7beef0ee4ULL
|
||||
, 0xc6e00bf33da88fc2ULL
|
||||
, 0xd5a79147930aa725ULL
|
||||
, 0x06ca6351e003826fULL
|
||||
, 0x142929670a0e6e70ULL
|
||||
, 0x27b70a8546d22ffcULL
|
||||
, 0x2e1b21385c26c926ULL
|
||||
, 0x4d2c6dfc5ac42aedULL
|
||||
, 0x53380d139d95b3dfULL
|
||||
, 0x650a73548baf63deULL
|
||||
, 0x766a0abb3c77b2a8ULL
|
||||
, 0x81c2c92e47edaee6ULL
|
||||
, 0x92722c851482353bULL
|
||||
, 0xa2bfe8a14cf10364ULL
|
||||
, 0xa81a664bbc423001ULL
|
||||
, 0xc24b8b70d0f89791ULL
|
||||
, 0xc76c51a30654be30ULL
|
||||
, 0xd192e819d6ef5218ULL
|
||||
, 0xd69906245565a910ULL
|
||||
, 0xf40e35855771202aULL
|
||||
, 0x106aa07032bbd1b8ULL
|
||||
, 0x19a4c116b8d2d0c8ULL
|
||||
, 0x1e376c085141ab53ULL
|
||||
, 0x2748774cdf8eeb99ULL
|
||||
, 0x34b0bcb5e19b48a8ULL
|
||||
, 0x391c0cb3c5c95a63ULL
|
||||
, 0x4ed8aa4ae3418acbULL
|
||||
, 0x5b9cca4f7763e373ULL
|
||||
, 0x682e6ff3d6b2b8a3ULL
|
||||
, 0x748f82ee5defb2fcULL
|
||||
, 0x78a5636f43172f60ULL
|
||||
, 0x84c87814a1f0ab72ULL
|
||||
, 0x8cc702081a6439ecULL
|
||||
, 0x90befffa23631e28ULL
|
||||
, 0xa4506cebde82bde9ULL
|
||||
, 0xbef9a3f7b2c67915ULL
|
||||
, 0xc67178f2e372532bULL
|
||||
, 0xca273eceea26619cULL
|
||||
, 0xd186b8c721c0c207ULL
|
||||
, 0xeada7dd6cde0eb1eULL
|
||||
, 0xf57d4f7fee6ed178ULL
|
||||
, 0x06f067aa72176fbaULL
|
||||
, 0x0a637dc5a2c898a6ULL
|
||||
, 0x113f9804bef90daeULL
|
||||
, 0x1b710b35131c471bULL
|
||||
, 0x28db77f523047d84ULL
|
||||
, 0x32caab7b40c72493ULL
|
||||
, 0x3c9ebe0a15c9bebcULL
|
||||
, 0x431d67c49c100d4cULL
|
||||
, 0x4cc5d4becb3e42b6ULL
|
||||
, 0x597f299cfc657e2aULL
|
||||
, 0x5fcb6fab3ad6faecULL
|
||||
, 0x6c44198c4a475817ULL
|
||||
|
||||
/* constants for the AVX code: */
|
||||
, 0x0001020304050607ULL
|
||||
, 0x08090a0b0c0d0e0fULL
|
||||
, 0x0001020304050607ULL
|
||||
, 0x08090a0b0c0d0e0fULL
|
||||
};
|
||||
|
||||
#define CUTOFF 65536 /* must be multiple of 128 */
|
||||
|
||||
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,long long inlen)
|
||||
{
|
||||
while (inlen >= CUTOFF) {
|
||||
inner(statebytes,in,CUTOFF,constants); /* returns 0 */
|
||||
in += CUTOFF;
|
||||
inlen -= CUTOFF;
|
||||
}
|
||||
if (inlen < 128) return inlen;
|
||||
return inner(statebytes,in,inlen,constants);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
for i in (0,4,8,12):
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] != str(i):
|
||||
continue
|
||||
|
||||
i0 = (i+0)&15
|
||||
i1 = (i+1)&15
|
||||
i9 = (i+9)&15
|
||||
i14 = (i+14)&15
|
||||
|
||||
print(' X%d = mem256[&w + %d]' % (i1,8*i1))
|
||||
print(' W%d = mem128[&w + %d],0' % (i14,8*i14))
|
||||
print('')
|
||||
print(' 4x X%dright1 = X%d unsigned>> 1' % (i1,i1))
|
||||
print(' 4x X%dleft63 = X%d << 63' % (i1,i1))
|
||||
print(' X%dsigma0 = X%dright1 ^ X%dleft63' % (i1,i1,i1))
|
||||
print(' 4x X%dright8 = X%d unsigned>> 8' % (i1,i1))
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dright8' % (i1,i1,i1))
|
||||
print(' 2x,0 W%dright19 = W%d unsigned>> 19' % (i14,i14))
|
||||
print(' 4x X%dleft56 = X%d << 56' % (i1,i1))
|
||||
print(' 2x,0 W%dleft45 = W%d << 45' % (i14,i14))
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dleft56' % (i1,i1,i1))
|
||||
print(' 1x,0 W%dsigma1 = W%dright19 ^ W%dleft45' % (i14,i14,i14))
|
||||
print(' 4x X%dright7 = X%d unsigned>> 7' % (i1,i1))
|
||||
print(' 2x,0 W%dright61 = W%d unsigned>> 61' % (i14,i14))
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dright7' % (i1,i1,i1))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright61' % (i14,i14))
|
||||
print(' 4x X%d = X%d + X%dsigma0' % (i0,i0,i1))
|
||||
print(' 2x,0 W%dleft3 = W%d << 3' % (i14,i14))
|
||||
print(' 4x X%d = X%d + mem256[&w + %d]' % (i0,i0,8*i9))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dleft3' % (i14,i14))
|
||||
print(' 2x,0 W%dright6 = W%d unsigned>> 6' % (i14,i14))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright6' % (i14,i14))
|
||||
print(' 4x X%d = W%dsigma1 + X%d' % (i0,i14,i0))
|
||||
print('')
|
||||
print(' 2x,0 W%dright19 = X%d unsigned>> 19' % (i0,i0))
|
||||
print(' 2x,0 W%dleft45 = X%d << 45' % (i0,i0))
|
||||
print(' 1x,0 W%dsigma1 = W%dright19 ^ W%dleft45' % (i0,i0,i0))
|
||||
print(' 2x,0 W%dright61 = X%d unsigned>> 61' % (i0,i0))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright61' % (i0,i0))
|
||||
print(' 2x,0 W%dleft3 = X%d << 3' % (i0,i0))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dleft3' % (i0,i0))
|
||||
print(' 2x,0 W%dright6 = X%d unsigned>> 6' % (i0,i0))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright6' % (i0,i0))
|
||||
print(' W%dsigma1 = W%dsigma1[1],W%dsigma1[0]' % (i0,i0,i0))
|
||||
print('')
|
||||
print(' 4x X%d = X%d + W%dsigma1' % (i0,i0,i0))
|
||||
if i == 0:
|
||||
print(' mem256[&w + 128] = X%d' % (i0))
|
||||
print(' mem256[&w + %d] = X%d' % (8*i0,i0))
|
||||
print(' 4x D%d = X%d + mem256[constants + %d]' % (i0,i0,8*i0))
|
||||
print(' wc%d%d%d%d = D%d' % (i,i+1,i+2,i+3,i0))
|
||||
print('')
|
||||
@@ -0,0 +1,42 @@
|
||||
X1 = mem256[&w + 8]
|
||||
W14 = mem128[&w + 112],0
|
||||
|
||||
4x X1right1 = X1 unsigned>> 1
|
||||
4x X1left63 = X1 << 63
|
||||
X1sigma0 = X1right1 ^ X1left63
|
||||
4x X1right8 = X1 unsigned>> 8
|
||||
X1sigma0 = X1sigma0 ^ X1right8
|
||||
2x,0 W14right19 = W14 unsigned>> 19
|
||||
4x X1left56 = X1 << 56
|
||||
2x,0 W14left45 = W14 << 45
|
||||
X1sigma0 = X1sigma0 ^ X1left56
|
||||
1x,0 W14sigma1 = W14right19 ^ W14left45
|
||||
4x X1right7 = X1 unsigned>> 7
|
||||
2x,0 W14right61 = W14 unsigned>> 61
|
||||
X1sigma0 = X1sigma0 ^ X1right7
|
||||
1x,0 W14sigma1 ^= W14right61
|
||||
4x X0 = X0 + X1sigma0
|
||||
2x,0 W14left3 = W14 << 3
|
||||
4x X0 = X0 + mem256[&w + 72]
|
||||
1x,0 W14sigma1 ^= W14left3
|
||||
2x,0 W14right6 = W14 unsigned>> 6
|
||||
1x,0 W14sigma1 ^= W14right6
|
||||
4x X0 = W14sigma1 + X0
|
||||
|
||||
2x,0 W0right19 = X0 unsigned>> 19
|
||||
2x,0 W0left45 = X0 << 45
|
||||
1x,0 W0sigma1 = W0right19 ^ W0left45
|
||||
2x,0 W0right61 = X0 unsigned>> 61
|
||||
1x,0 W0sigma1 ^= W0right61
|
||||
2x,0 W0left3 = X0 << 3
|
||||
1x,0 W0sigma1 ^= W0left3
|
||||
2x,0 W0right6 = X0 unsigned>> 6
|
||||
1x,0 W0sigma1 ^= W0right6
|
||||
W0sigma1 = W0sigma1[1],W0sigma1[0]
|
||||
|
||||
4x X0 = X0 + W0sigma1
|
||||
mem256[&w + 128] = X0
|
||||
mem256[&w + 0] = X0
|
||||
4x D0 = X0 + mem256[constants + 0]
|
||||
wc0123 = D0
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
X13 = mem256[&w + 104]
|
||||
W10 = mem128[&w + 80],0
|
||||
|
||||
4x X13right1 = X13 unsigned>> 1
|
||||
4x X13left63 = X13 << 63
|
||||
X13sigma0 = X13right1 ^ X13left63
|
||||
4x X13right8 = X13 unsigned>> 8
|
||||
X13sigma0 = X13sigma0 ^ X13right8
|
||||
2x,0 W10right19 = W10 unsigned>> 19
|
||||
4x X13left56 = X13 << 56
|
||||
2x,0 W10left45 = W10 << 45
|
||||
X13sigma0 = X13sigma0 ^ X13left56
|
||||
1x,0 W10sigma1 = W10right19 ^ W10left45
|
||||
4x X13right7 = X13 unsigned>> 7
|
||||
2x,0 W10right61 = W10 unsigned>> 61
|
||||
X13sigma0 = X13sigma0 ^ X13right7
|
||||
1x,0 W10sigma1 ^= W10right61
|
||||
4x X12 = X12 + X13sigma0
|
||||
2x,0 W10left3 = W10 << 3
|
||||
4x X12 = X12 + mem256[&w + 40]
|
||||
1x,0 W10sigma1 ^= W10left3
|
||||
2x,0 W10right6 = W10 unsigned>> 6
|
||||
1x,0 W10sigma1 ^= W10right6
|
||||
4x X12 = W10sigma1 + X12
|
||||
|
||||
2x,0 W12right19 = X12 unsigned>> 19
|
||||
2x,0 W12left45 = X12 << 45
|
||||
1x,0 W12sigma1 = W12right19 ^ W12left45
|
||||
2x,0 W12right61 = X12 unsigned>> 61
|
||||
1x,0 W12sigma1 ^= W12right61
|
||||
2x,0 W12left3 = X12 << 3
|
||||
1x,0 W12sigma1 ^= W12left3
|
||||
2x,0 W12right6 = X12 unsigned>> 6
|
||||
1x,0 W12sigma1 ^= W12right6
|
||||
W12sigma1 = W12sigma1[1],W12sigma1[0]
|
||||
|
||||
4x X12 = X12 + W12sigma1
|
||||
mem256[&w + 96] = X12
|
||||
4x D12 = X12 + mem256[constants + 96]
|
||||
wc12131415 = D12
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
X5 = mem256[&w + 40]
|
||||
W2 = mem128[&w + 16],0
|
||||
|
||||
4x X5right1 = X5 unsigned>> 1
|
||||
4x X5left63 = X5 << 63
|
||||
X5sigma0 = X5right1 ^ X5left63
|
||||
4x X5right8 = X5 unsigned>> 8
|
||||
X5sigma0 = X5sigma0 ^ X5right8
|
||||
2x,0 W2right19 = W2 unsigned>> 19
|
||||
4x X5left56 = X5 << 56
|
||||
2x,0 W2left45 = W2 << 45
|
||||
X5sigma0 = X5sigma0 ^ X5left56
|
||||
1x,0 W2sigma1 = W2right19 ^ W2left45
|
||||
4x X5right7 = X5 unsigned>> 7
|
||||
2x,0 W2right61 = W2 unsigned>> 61
|
||||
X5sigma0 = X5sigma0 ^ X5right7
|
||||
1x,0 W2sigma1 ^= W2right61
|
||||
4x X4 = X4 + X5sigma0
|
||||
2x,0 W2left3 = W2 << 3
|
||||
4x X4 = X4 + mem256[&w + 104]
|
||||
1x,0 W2sigma1 ^= W2left3
|
||||
2x,0 W2right6 = W2 unsigned>> 6
|
||||
1x,0 W2sigma1 ^= W2right6
|
||||
4x X4 = W2sigma1 + X4
|
||||
|
||||
2x,0 W4right19 = X4 unsigned>> 19
|
||||
2x,0 W4left45 = X4 << 45
|
||||
1x,0 W4sigma1 = W4right19 ^ W4left45
|
||||
2x,0 W4right61 = X4 unsigned>> 61
|
||||
1x,0 W4sigma1 ^= W4right61
|
||||
2x,0 W4left3 = X4 << 3
|
||||
1x,0 W4sigma1 ^= W4left3
|
||||
2x,0 W4right6 = X4 unsigned>> 6
|
||||
1x,0 W4sigma1 ^= W4right6
|
||||
W4sigma1 = W4sigma1[1],W4sigma1[0]
|
||||
|
||||
4x X4 = X4 + W4sigma1
|
||||
mem256[&w + 32] = X4
|
||||
4x D4 = X4 + mem256[constants + 32]
|
||||
wc4567 = D4
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
X9 = mem256[&w + 72]
|
||||
W6 = mem128[&w + 48],0
|
||||
|
||||
4x X9right1 = X9 unsigned>> 1
|
||||
4x X9left63 = X9 << 63
|
||||
X9sigma0 = X9right1 ^ X9left63
|
||||
4x X9right8 = X9 unsigned>> 8
|
||||
X9sigma0 = X9sigma0 ^ X9right8
|
||||
2x,0 W6right19 = W6 unsigned>> 19
|
||||
4x X9left56 = X9 << 56
|
||||
2x,0 W6left45 = W6 << 45
|
||||
X9sigma0 = X9sigma0 ^ X9left56
|
||||
1x,0 W6sigma1 = W6right19 ^ W6left45
|
||||
4x X9right7 = X9 unsigned>> 7
|
||||
2x,0 W6right61 = W6 unsigned>> 61
|
||||
X9sigma0 = X9sigma0 ^ X9right7
|
||||
1x,0 W6sigma1 ^= W6right61
|
||||
4x X8 = X8 + X9sigma0
|
||||
2x,0 W6left3 = W6 << 3
|
||||
4x X8 = X8 + mem256[&w + 8]
|
||||
1x,0 W6sigma1 ^= W6left3
|
||||
2x,0 W6right6 = W6 unsigned>> 6
|
||||
1x,0 W6sigma1 ^= W6right6
|
||||
4x X8 = W6sigma1 + X8
|
||||
|
||||
2x,0 W8right19 = X8 unsigned>> 19
|
||||
2x,0 W8left45 = X8 << 45
|
||||
1x,0 W8sigma1 = W8right19 ^ W8left45
|
||||
2x,0 W8right61 = X8 unsigned>> 61
|
||||
1x,0 W8sigma1 ^= W8right61
|
||||
2x,0 W8left3 = X8 << 3
|
||||
1x,0 W8sigma1 ^= W8left3
|
||||
2x,0 W8right6 = X8 unsigned>> 6
|
||||
1x,0 W8sigma1 ^= W8right6
|
||||
W8sigma1 = W8sigma1[1],W8sigma1[0]
|
||||
|
||||
4x X8 = X8 + W8sigma1
|
||||
mem256[&w + 64] = X8
|
||||
4x D8 = X8 + mem256[constants + 64]
|
||||
wc891011 = D8
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
../m3/implementors
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
|
||||
cpp \
|
||||
| qhasm-amd64avx \
|
||||
| sed 's/\<inner\>/CRYPTO_SHARED_NAMESPACE(inner)/' \
|
||||
| sed 's/\<_inner\>/_CRYPTO_SHARED_NAMESPACE(inner)/' \
|
||||
| sed 's/^\.p2align 5/.p2align 7/' \
|
||||
| awk '{
|
||||
found = 0
|
||||
if (!found && $0 == "and $31,%r11") {
|
||||
found = 1
|
||||
$0 = "and $511,%r11"
|
||||
}
|
||||
print
|
||||
}'
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef inner_h
|
||||
#define inner_h
|
||||
|
||||
#define inner CRYPTO_SHARED_NAMESPACE(inner)
|
||||
|
||||
#include "crypto_uint64.h"
|
||||
|
||||
extern int inner(unsigned char *,const unsigned char *,unsigned long long,const crypto_uint64 *);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
+198
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
j = int(sys.argv[1])
|
||||
|
||||
j0 = (j+0)&15
|
||||
j1 = (j+1)&15
|
||||
j2 = (j+2)&15
|
||||
j3 = (j+3)&15
|
||||
j9 = (j+9)&15
|
||||
j14 = (j+14)&15
|
||||
|
||||
k0 = (j+8)&15
|
||||
k1 = (j+9)&15
|
||||
k2 = (j+10)&15
|
||||
k3 = (j+11)&15
|
||||
|
||||
wcin = 'wc%d%d%d%d' % (k0,k1,k2,k3)
|
||||
wcout = 'wc%d%d%d%d' % (j0,j1,j2,j3)
|
||||
|
||||
i0 = (j+0)&7
|
||||
i1 = (j+1)&7
|
||||
i2 = (j+2)&7
|
||||
i3 = (j+3)&7
|
||||
i4 = (j+4)&7
|
||||
i5 = (j+5)&7
|
||||
i6 = (j+6)&7
|
||||
i7 = (j+7)&7
|
||||
|
||||
print(' X%d = mem256[&w + %d]' % (j1,8*j1))
|
||||
print(' 4x X%dright1 = X%d unsigned>> 1' % (j1,j1))
|
||||
print(' r%dSigma1 = r%d>>>14' % (i4,i4))
|
||||
print(' r%d += %s[0]' % (i7,wcin))
|
||||
print(' ch%d = r%d' % (i7,i6))
|
||||
print(' ch%d ^= r%d' % (i7,i5))
|
||||
print('')
|
||||
print(' 4x X%dleft63 = X%d << 63' % (j1,j1))
|
||||
print(' r%d18 = r%d>>>18' % (i4,i4))
|
||||
print(' ch%d &= r%d' % (i7,i4))
|
||||
print(' maj%d = r%d' % (i6,i1))
|
||||
print(' maj%d ^= r%d' % (i6,i0))
|
||||
print('')
|
||||
print(' X%dsigma0 = X%dright1 ^ X%dleft63' % (j1,j1,j1))
|
||||
print(' r%d41 = r%d>>>41' % (i4,i4))
|
||||
print(' r%dSigma1 ^= r%d18' % (i4,i4))
|
||||
print(' ch%d ^= r%d' % (i7,i6))
|
||||
print('')
|
||||
print(' 4x X%dright8 = X%d unsigned>> 8' % (j1,j1))
|
||||
print(' r%dSigma1 ^= r%d41' % (i4,i4))
|
||||
print(' r%dSigma0 = r%d>>>28' % (i0,i0))
|
||||
print(' r%d += ch%d' % (i7,i7))
|
||||
print('')
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dright8' % (j1,j1,j1))
|
||||
print(' r%d34 = r%d>>>34' % (i0,i0))
|
||||
print(' r%d += r%dSigma1' % (i7,i4))
|
||||
print(' maj%d = r%d' % (i7,i2))
|
||||
print(' maj%d &= maj%d' % (i7,i6))
|
||||
print('')
|
||||
print(' W%d = mem128[&w + %d],0' % (j14,8*j14))
|
||||
print(' 2x,0 W%dright19 = W%d unsigned>> 19' % (j14,j14))
|
||||
print(' r%dSigma0 ^= r%d34' % (i0,i0))
|
||||
print(' r%d39 = r%d>>>39' % (i0,i0))
|
||||
print(' r%d += r%d' % (i3,i7))
|
||||
print('')
|
||||
print(' 4x X%dleft56 = X%d << 56' % (j1,j1))
|
||||
print(' r%dSigma0 ^= r%d39' % (i0,i0))
|
||||
print(' r%d += %s[1]' % (i6,wcin))
|
||||
print(' r%dandr%d = r%d' % (i0,i1,i1))
|
||||
print(' r%dandr%d &= r%d' % (i0,i1,i0))
|
||||
print('')
|
||||
print(' 2x,0 W%dleft45 = W%d << 45' % (j14,j14))
|
||||
print(' r%d += r%dSigma0' % (i7,i0))
|
||||
print(' maj%d ^= r%dandr%d' % (i7,i0,i1))
|
||||
print(' ch%d = r%d' % (i6,i5))
|
||||
print(' ch%d ^= r%d' % (i6,i4))
|
||||
print('')
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dleft56' % (j1,j1,j1))
|
||||
print(' 2x,0 W%dright61 = W%d unsigned>> 61' % (j14,j14))
|
||||
print(' r%dSigma1 = r%d>>>14' % (i3,i3))
|
||||
print(' r%d += maj%d' % (i7,i7))
|
||||
print('')
|
||||
print(' 4x X%dright7 = X%d unsigned>> 7' % (j1,j1))
|
||||
print(' 1x,0 W%dsigma1 = W%dright19 ^ W%dleft45' % (j14,j14,j14))
|
||||
print(' r%d18 = r%d>>>18' % (i3,i3))
|
||||
print(' ch%d &= r%d' % (i6,i3))
|
||||
print('')
|
||||
print(' X%dsigma0 = X%dsigma0 ^ X%dright7' % (j1,j1,j1))
|
||||
print(' r%dSigma1 ^= r%d18' % (i3,i3))
|
||||
print(' r%d41 = r%d>>>41' % (i3,i3))
|
||||
print(' maj%d &= r%d' % (i6,i7))
|
||||
print('')
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright61' % (j14,j14))
|
||||
print(' 4x X%d = X%d + mem256[&w + %d]' % (j0,j0,8*j9))
|
||||
print(' r%dSigma1 ^= r%d41' % (i3,i3))
|
||||
print(' maj%d ^= r%dandr%d' % (i6,i0,i1))
|
||||
print('')
|
||||
print(' 2x,0 W%dleft3 = W%d << 3' % (j14,j14))
|
||||
print(' r%dSigma0 = r%d>>>28' % (i7,i7))
|
||||
print(' ch%d ^= r%d' % (i6,i5))
|
||||
print(' r%d += r%dSigma1' % (i6,i3))
|
||||
print('')
|
||||
print(' 4x X%d = X%d + X%dsigma0' % (j0,j0,j1))
|
||||
print(' r%d34 = r%d>>>34' % (i7,i7))
|
||||
print(' r%d += %s[2]' % (i5,wcin))
|
||||
print(' r%d += ch%d' % (i6,i6))
|
||||
print('')
|
||||
print(' 1x,0 W%dsigma1 ^= W%dleft3' % (j14,j14))
|
||||
print(' r%dSigma0 ^= r%d34' % (i7,i7))
|
||||
print(' r%d39 = r%d>>>39' % (i7,i7))
|
||||
print(' r%d += r%d' % (i2,i6))
|
||||
print('')
|
||||
print(' 2x,0 W%dright6 = W%d unsigned>> 6' % (j14,j14))
|
||||
print(' r%dSigma0 ^= r%d39' % (i7,i7))
|
||||
print(' r%d += maj%d' % (i6,i6))
|
||||
print(' ch%d = r%d' % (i5,i4))
|
||||
print(' ch%d ^= r%d' % (i5,i3))
|
||||
print('')
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright6' % (j14,j14))
|
||||
print(' r%d += r%dSigma0' % (i6,i7))
|
||||
print(' r%dSigma1 = r%d>>>14' % (i2,i2))
|
||||
print(' ch%d &= r%d' % (i5,i2))
|
||||
print('')
|
||||
print(' 4x X%d = W%dsigma1 + X%d' % (j0,j14,j0))
|
||||
print(' r%d18 = r%d>>>18' % (i2,i2))
|
||||
print(' r%d41 = r%d>>>41' % (i2,i2))
|
||||
print(' ch%d ^= r%d' % (i5,i4))
|
||||
print('')
|
||||
print(' 2x,0 W%dright19 = X%d unsigned>> 19' % (j0,j0))
|
||||
print(' r%dSigma1 ^= r%d18' % (i2,i2))
|
||||
print(' r%dSigma0 = r%d>>>28' % (i6,i6))
|
||||
print(' r%d += ch%d' % (i5,i5))
|
||||
print('')
|
||||
print(' 2x,0 W%dleft45 = X%d << 45' % (j0,j0))
|
||||
print(' r%dSigma1 ^= r%d41' % (i2,i2))
|
||||
print(' r%d34 = r%d>>>34' % (i6,i6))
|
||||
print(' maj%d = r%d' % (i4,i7))
|
||||
print(' maj%d ^= r%d' % (i4,i6))
|
||||
print('')
|
||||
print(' 2x,0 W%dright61 = X%d unsigned>> 61' % (j0,j0))
|
||||
print(' 1x,0 W%dsigma1 = W%dright19 ^ W%dleft45' % (j0,j0,j0))
|
||||
print(' r%dSigma0 ^= r%d34' % (i6,i6))
|
||||
print(' r%d39 = r%d>>>39' % (i6,i6))
|
||||
print('')
|
||||
print(' 2x,0 W%dleft3 = X%d << 3' % (j0,j0))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright61' % (j0,j0))
|
||||
print(' r%dSigma0 ^= r%d39' % (i6,i6))
|
||||
print(' r%d += r%dSigma1' % (i5,i2))
|
||||
print('')
|
||||
print(' 2x,0 W%dright6 = X%d unsigned>> 6' % (j0,j0))
|
||||
print(' 1x,0 W%dsigma1 ^= W%dleft3' % (j0,j0))
|
||||
print(' r%d += r%d' % (i1,i5))
|
||||
print(' r%dandr%d = r%d' % (i6,i7,i7))
|
||||
print(' r%dandr%d &= r%d' % (i6,i7,i6))
|
||||
print('')
|
||||
print(' 1x,0 W%dsigma1 ^= W%dright6' % (j0,j0))
|
||||
print(' r%dSigma1 = r%d>>>14' % (i1,i1))
|
||||
print(' r%d += r%dSigma0' % (i5,i6))
|
||||
print(' maj%d = r%d' % (i5,i0))
|
||||
print(' maj%d &= maj%d' % (i5,i4))
|
||||
print('')
|
||||
print(' W%dsigma1 = W%dsigma1[1],W%dsigma1[0]' % (j0,j0,j0))
|
||||
print(' maj%d ^= r%dandr%d' % (i5,i6,i7))
|
||||
print(' ch%d = r%d' % (i4,i3))
|
||||
print(' ch%d ^= r%d' % (i4,i2))
|
||||
print('')
|
||||
print(' r%d += maj%d' % (i5,i5))
|
||||
print(' ch%d &= r%d' % (i4,i1))
|
||||
print(' r%d18 = r%d>>>18' % (i1,i1))
|
||||
print('')
|
||||
print(' maj%d &= r%d' % (i4,i5))
|
||||
print(' ch%d ^= r%d' % (i4,i3))
|
||||
print(' r%d += %s[3]' % (i4,wcin))
|
||||
print(' r%dSigma1 ^= r%d18' % (i1,i1))
|
||||
print('')
|
||||
print(' r%d41 = r%d>>>41' % (i1,i1))
|
||||
print(' 4x X%d = X%d + W%dsigma1' % (j0,j0,j0))
|
||||
if j0 == 0:
|
||||
print(' mem256[&w + 128] = X%d' % (j0))
|
||||
print(' mem256[&w + %d] = X%d' % (8*j0,j0))
|
||||
print(' r%d += ch%d' % (i4,i4))
|
||||
print(' maj%d ^= r%dandr%d' % (i4,i6,i7))
|
||||
print('')
|
||||
print(' r%dSigma0 = r%d>>>28' % (i5,i5))
|
||||
print(' 4x D%d = X%d + mem256[constants + %d]' % (j0,j0,8*j0))
|
||||
print(' %s = D%d' % (wcout,j0))
|
||||
print(' r%d34 = r%d>>>34' % (i5,i5))
|
||||
print(' r%dSigma1 ^= r%d41' % (i1,i1))
|
||||
print('')
|
||||
print(' r%d += r%dSigma1' % (i4,i1))
|
||||
print(' r%dSigma0 ^= r%d34' % (i5,i5))
|
||||
print(' r%d39 = r%d>>>39' % (i5,i5))
|
||||
print('')
|
||||
print(' r%d += r%d' % (i0,i4))
|
||||
print(' r%d += maj%d' % (i4,i4))
|
||||
print(' r%dSigma0 ^= r%d39' % (i5,i5))
|
||||
print('')
|
||||
print(' r%d += r%dSigma0' % (i4,i5))
|
||||
@@ -0,0 +1,167 @@
|
||||
X1 = mem256[&w + 8]
|
||||
4x X1right1 = X1 unsigned>> 1
|
||||
r4Sigma1 = r4>>>14
|
||||
r7 += wc891011[0]
|
||||
ch7 = r6
|
||||
ch7 ^= r5
|
||||
|
||||
4x X1left63 = X1 << 63
|
||||
r418 = r4>>>18
|
||||
ch7 &= r4
|
||||
maj6 = r1
|
||||
maj6 ^= r0
|
||||
|
||||
X1sigma0 = X1right1 ^ X1left63
|
||||
r441 = r4>>>41
|
||||
r4Sigma1 ^= r418
|
||||
ch7 ^= r6
|
||||
|
||||
4x X1right8 = X1 unsigned>> 8
|
||||
r4Sigma1 ^= r441
|
||||
r0Sigma0 = r0>>>28
|
||||
r7 += ch7
|
||||
|
||||
X1sigma0 = X1sigma0 ^ X1right8
|
||||
r034 = r0>>>34
|
||||
r7 += r4Sigma1
|
||||
maj7 = r2
|
||||
maj7 &= maj6
|
||||
|
||||
W14 = mem128[&w + 112],0
|
||||
2x,0 W14right19 = W14 unsigned>> 19
|
||||
r0Sigma0 ^= r034
|
||||
r039 = r0>>>39
|
||||
r3 += r7
|
||||
|
||||
4x X1left56 = X1 << 56
|
||||
r0Sigma0 ^= r039
|
||||
r6 += wc891011[1]
|
||||
r0andr1 = r1
|
||||
r0andr1 &= r0
|
||||
|
||||
2x,0 W14left45 = W14 << 45
|
||||
r7 += r0Sigma0
|
||||
maj7 ^= r0andr1
|
||||
ch6 = r5
|
||||
ch6 ^= r4
|
||||
|
||||
X1sigma0 = X1sigma0 ^ X1left56
|
||||
2x,0 W14right61 = W14 unsigned>> 61
|
||||
r3Sigma1 = r3>>>14
|
||||
r7 += maj7
|
||||
|
||||
4x X1right7 = X1 unsigned>> 7
|
||||
1x,0 W14sigma1 = W14right19 ^ W14left45
|
||||
r318 = r3>>>18
|
||||
ch6 &= r3
|
||||
|
||||
X1sigma0 = X1sigma0 ^ X1right7
|
||||
r3Sigma1 ^= r318
|
||||
r341 = r3>>>41
|
||||
maj6 &= r7
|
||||
|
||||
1x,0 W14sigma1 ^= W14right61
|
||||
4x X0 = X0 + mem256[&w + 72]
|
||||
r3Sigma1 ^= r341
|
||||
maj6 ^= r0andr1
|
||||
|
||||
2x,0 W14left3 = W14 << 3
|
||||
r7Sigma0 = r7>>>28
|
||||
ch6 ^= r5
|
||||
r6 += r3Sigma1
|
||||
|
||||
4x X0 = X0 + X1sigma0
|
||||
r734 = r7>>>34
|
||||
r5 += wc891011[2]
|
||||
r6 += ch6
|
||||
|
||||
1x,0 W14sigma1 ^= W14left3
|
||||
r7Sigma0 ^= r734
|
||||
r739 = r7>>>39
|
||||
r2 += r6
|
||||
|
||||
2x,0 W14right6 = W14 unsigned>> 6
|
||||
r7Sigma0 ^= r739
|
||||
r6 += maj6
|
||||
ch5 = r4
|
||||
ch5 ^= r3
|
||||
|
||||
1x,0 W14sigma1 ^= W14right6
|
||||
r6 += r7Sigma0
|
||||
r2Sigma1 = r2>>>14
|
||||
ch5 &= r2
|
||||
|
||||
4x X0 = W14sigma1 + X0
|
||||
r218 = r2>>>18
|
||||
r241 = r2>>>41
|
||||
ch5 ^= r4
|
||||
|
||||
2x,0 W0right19 = X0 unsigned>> 19
|
||||
r2Sigma1 ^= r218
|
||||
r6Sigma0 = r6>>>28
|
||||
r5 += ch5
|
||||
|
||||
2x,0 W0left45 = X0 << 45
|
||||
r2Sigma1 ^= r241
|
||||
r634 = r6>>>34
|
||||
maj4 = r7
|
||||
maj4 ^= r6
|
||||
|
||||
2x,0 W0right61 = X0 unsigned>> 61
|
||||
1x,0 W0sigma1 = W0right19 ^ W0left45
|
||||
r6Sigma0 ^= r634
|
||||
r639 = r6>>>39
|
||||
|
||||
2x,0 W0left3 = X0 << 3
|
||||
1x,0 W0sigma1 ^= W0right61
|
||||
r6Sigma0 ^= r639
|
||||
r5 += r2Sigma1
|
||||
|
||||
2x,0 W0right6 = X0 unsigned>> 6
|
||||
1x,0 W0sigma1 ^= W0left3
|
||||
r1 += r5
|
||||
r6andr7 = r7
|
||||
r6andr7 &= r6
|
||||
|
||||
1x,0 W0sigma1 ^= W0right6
|
||||
r1Sigma1 = r1>>>14
|
||||
r5 += r6Sigma0
|
||||
maj5 = r0
|
||||
maj5 &= maj4
|
||||
|
||||
W0sigma1 = W0sigma1[1],W0sigma1[0]
|
||||
maj5 ^= r6andr7
|
||||
ch4 = r3
|
||||
ch4 ^= r2
|
||||
|
||||
r5 += maj5
|
||||
ch4 &= r1
|
||||
r118 = r1>>>18
|
||||
|
||||
maj4 &= r5
|
||||
ch4 ^= r3
|
||||
r4 += wc891011[3]
|
||||
r1Sigma1 ^= r118
|
||||
|
||||
r141 = r1>>>41
|
||||
4x X0 = X0 + W0sigma1
|
||||
mem256[&w + 128] = X0
|
||||
mem256[&w + 0] = X0
|
||||
r4 += ch4
|
||||
maj4 ^= r6andr7
|
||||
|
||||
r5Sigma0 = r5>>>28
|
||||
4x D0 = X0 + mem256[constants + 0]
|
||||
wc0123 = D0
|
||||
r534 = r5>>>34
|
||||
r1Sigma1 ^= r141
|
||||
|
||||
r4 += r1Sigma1
|
||||
r5Sigma0 ^= r534
|
||||
r539 = r5>>>39
|
||||
|
||||
r0 += r4
|
||||
r4 += maj4
|
||||
r5Sigma0 ^= r539
|
||||
|
||||
r4 += r5Sigma0
|
||||
@@ -0,0 +1,166 @@
|
||||
X13 = mem256[&w + 104]
|
||||
4x X13right1 = X13 unsigned>> 1
|
||||
r0Sigma1 = r0>>>14
|
||||
r3 += wc4567[0]
|
||||
ch3 = r2
|
||||
ch3 ^= r1
|
||||
|
||||
4x X13left63 = X13 << 63
|
||||
r018 = r0>>>18
|
||||
ch3 &= r0
|
||||
maj2 = r5
|
||||
maj2 ^= r4
|
||||
|
||||
X13sigma0 = X13right1 ^ X13left63
|
||||
r041 = r0>>>41
|
||||
r0Sigma1 ^= r018
|
||||
ch3 ^= r2
|
||||
|
||||
4x X13right8 = X13 unsigned>> 8
|
||||
r0Sigma1 ^= r041
|
||||
r4Sigma0 = r4>>>28
|
||||
r3 += ch3
|
||||
|
||||
X13sigma0 = X13sigma0 ^ X13right8
|
||||
r434 = r4>>>34
|
||||
r3 += r0Sigma1
|
||||
maj3 = r6
|
||||
maj3 &= maj2
|
||||
|
||||
W10 = mem128[&w + 80],0
|
||||
2x,0 W10right19 = W10 unsigned>> 19
|
||||
r4Sigma0 ^= r434
|
||||
r439 = r4>>>39
|
||||
r7 += r3
|
||||
|
||||
4x X13left56 = X13 << 56
|
||||
r4Sigma0 ^= r439
|
||||
r2 += wc4567[1]
|
||||
r4andr5 = r5
|
||||
r4andr5 &= r4
|
||||
|
||||
2x,0 W10left45 = W10 << 45
|
||||
r3 += r4Sigma0
|
||||
maj3 ^= r4andr5
|
||||
ch2 = r1
|
||||
ch2 ^= r0
|
||||
|
||||
X13sigma0 = X13sigma0 ^ X13left56
|
||||
2x,0 W10right61 = W10 unsigned>> 61
|
||||
r7Sigma1 = r7>>>14
|
||||
r3 += maj3
|
||||
|
||||
4x X13right7 = X13 unsigned>> 7
|
||||
1x,0 W10sigma1 = W10right19 ^ W10left45
|
||||
r718 = r7>>>18
|
||||
ch2 &= r7
|
||||
|
||||
X13sigma0 = X13sigma0 ^ X13right7
|
||||
r7Sigma1 ^= r718
|
||||
r741 = r7>>>41
|
||||
maj2 &= r3
|
||||
|
||||
1x,0 W10sigma1 ^= W10right61
|
||||
4x X12 = X12 + mem256[&w + 40]
|
||||
r7Sigma1 ^= r741
|
||||
maj2 ^= r4andr5
|
||||
|
||||
2x,0 W10left3 = W10 << 3
|
||||
r3Sigma0 = r3>>>28
|
||||
ch2 ^= r1
|
||||
r2 += r7Sigma1
|
||||
|
||||
4x X12 = X12 + X13sigma0
|
||||
r334 = r3>>>34
|
||||
r1 += wc4567[2]
|
||||
r2 += ch2
|
||||
|
||||
1x,0 W10sigma1 ^= W10left3
|
||||
r3Sigma0 ^= r334
|
||||
r339 = r3>>>39
|
||||
r6 += r2
|
||||
|
||||
2x,0 W10right6 = W10 unsigned>> 6
|
||||
r3Sigma0 ^= r339
|
||||
r2 += maj2
|
||||
ch1 = r0
|
||||
ch1 ^= r7
|
||||
|
||||
1x,0 W10sigma1 ^= W10right6
|
||||
r2 += r3Sigma0
|
||||
r6Sigma1 = r6>>>14
|
||||
ch1 &= r6
|
||||
|
||||
4x X12 = W10sigma1 + X12
|
||||
r618 = r6>>>18
|
||||
r641 = r6>>>41
|
||||
ch1 ^= r0
|
||||
|
||||
2x,0 W12right19 = X12 unsigned>> 19
|
||||
r6Sigma1 ^= r618
|
||||
r2Sigma0 = r2>>>28
|
||||
r1 += ch1
|
||||
|
||||
2x,0 W12left45 = X12 << 45
|
||||
r6Sigma1 ^= r641
|
||||
r234 = r2>>>34
|
||||
maj0 = r3
|
||||
maj0 ^= r2
|
||||
|
||||
2x,0 W12right61 = X12 unsigned>> 61
|
||||
1x,0 W12sigma1 = W12right19 ^ W12left45
|
||||
r2Sigma0 ^= r234
|
||||
r239 = r2>>>39
|
||||
|
||||
2x,0 W12left3 = X12 << 3
|
||||
1x,0 W12sigma1 ^= W12right61
|
||||
r2Sigma0 ^= r239
|
||||
r1 += r6Sigma1
|
||||
|
||||
2x,0 W12right6 = X12 unsigned>> 6
|
||||
1x,0 W12sigma1 ^= W12left3
|
||||
r5 += r1
|
||||
r2andr3 = r3
|
||||
r2andr3 &= r2
|
||||
|
||||
1x,0 W12sigma1 ^= W12right6
|
||||
r5Sigma1 = r5>>>14
|
||||
r1 += r2Sigma0
|
||||
maj1 = r4
|
||||
maj1 &= maj0
|
||||
|
||||
W12sigma1 = W12sigma1[1],W12sigma1[0]
|
||||
maj1 ^= r2andr3
|
||||
ch0 = r7
|
||||
ch0 ^= r6
|
||||
|
||||
r1 += maj1
|
||||
ch0 &= r5
|
||||
r518 = r5>>>18
|
||||
|
||||
maj0 &= r1
|
||||
ch0 ^= r7
|
||||
r0 += wc4567[3]
|
||||
r5Sigma1 ^= r518
|
||||
|
||||
r541 = r5>>>41
|
||||
4x X12 = X12 + W12sigma1
|
||||
mem256[&w + 96] = X12
|
||||
r0 += ch0
|
||||
maj0 ^= r2andr3
|
||||
|
||||
r1Sigma0 = r1>>>28
|
||||
4x D12 = X12 + mem256[constants + 96]
|
||||
wc12131415 = D12
|
||||
r134 = r1>>>34
|
||||
r5Sigma1 ^= r541
|
||||
|
||||
r0 += r5Sigma1
|
||||
r1Sigma0 ^= r134
|
||||
r139 = r1>>>39
|
||||
|
||||
r4 += r0
|
||||
r0 += maj0
|
||||
r1Sigma0 ^= r139
|
||||
|
||||
r0 += r1Sigma0
|
||||
@@ -0,0 +1,166 @@
|
||||
X5 = mem256[&w + 40]
|
||||
4x X5right1 = X5 unsigned>> 1
|
||||
r0Sigma1 = r0>>>14
|
||||
r3 += wc12131415[0]
|
||||
ch3 = r2
|
||||
ch3 ^= r1
|
||||
|
||||
4x X5left63 = X5 << 63
|
||||
r018 = r0>>>18
|
||||
ch3 &= r0
|
||||
maj2 = r5
|
||||
maj2 ^= r4
|
||||
|
||||
X5sigma0 = X5right1 ^ X5left63
|
||||
r041 = r0>>>41
|
||||
r0Sigma1 ^= r018
|
||||
ch3 ^= r2
|
||||
|
||||
4x X5right8 = X5 unsigned>> 8
|
||||
r0Sigma1 ^= r041
|
||||
r4Sigma0 = r4>>>28
|
||||
r3 += ch3
|
||||
|
||||
X5sigma0 = X5sigma0 ^ X5right8
|
||||
r434 = r4>>>34
|
||||
r3 += r0Sigma1
|
||||
maj3 = r6
|
||||
maj3 &= maj2
|
||||
|
||||
W2 = mem128[&w + 16],0
|
||||
2x,0 W2right19 = W2 unsigned>> 19
|
||||
r4Sigma0 ^= r434
|
||||
r439 = r4>>>39
|
||||
r7 += r3
|
||||
|
||||
4x X5left56 = X5 << 56
|
||||
r4Sigma0 ^= r439
|
||||
r2 += wc12131415[1]
|
||||
r4andr5 = r5
|
||||
r4andr5 &= r4
|
||||
|
||||
2x,0 W2left45 = W2 << 45
|
||||
r3 += r4Sigma0
|
||||
maj3 ^= r4andr5
|
||||
ch2 = r1
|
||||
ch2 ^= r0
|
||||
|
||||
X5sigma0 = X5sigma0 ^ X5left56
|
||||
2x,0 W2right61 = W2 unsigned>> 61
|
||||
r7Sigma1 = r7>>>14
|
||||
r3 += maj3
|
||||
|
||||
4x X5right7 = X5 unsigned>> 7
|
||||
1x,0 W2sigma1 = W2right19 ^ W2left45
|
||||
r718 = r7>>>18
|
||||
ch2 &= r7
|
||||
|
||||
X5sigma0 = X5sigma0 ^ X5right7
|
||||
r7Sigma1 ^= r718
|
||||
r741 = r7>>>41
|
||||
maj2 &= r3
|
||||
|
||||
1x,0 W2sigma1 ^= W2right61
|
||||
4x X4 = X4 + mem256[&w + 104]
|
||||
r7Sigma1 ^= r741
|
||||
maj2 ^= r4andr5
|
||||
|
||||
2x,0 W2left3 = W2 << 3
|
||||
r3Sigma0 = r3>>>28
|
||||
ch2 ^= r1
|
||||
r2 += r7Sigma1
|
||||
|
||||
4x X4 = X4 + X5sigma0
|
||||
r334 = r3>>>34
|
||||
r1 += wc12131415[2]
|
||||
r2 += ch2
|
||||
|
||||
1x,0 W2sigma1 ^= W2left3
|
||||
r3Sigma0 ^= r334
|
||||
r339 = r3>>>39
|
||||
r6 += r2
|
||||
|
||||
2x,0 W2right6 = W2 unsigned>> 6
|
||||
r3Sigma0 ^= r339
|
||||
r2 += maj2
|
||||
ch1 = r0
|
||||
ch1 ^= r7
|
||||
|
||||
1x,0 W2sigma1 ^= W2right6
|
||||
r2 += r3Sigma0
|
||||
r6Sigma1 = r6>>>14
|
||||
ch1 &= r6
|
||||
|
||||
4x X4 = W2sigma1 + X4
|
||||
r618 = r6>>>18
|
||||
r641 = r6>>>41
|
||||
ch1 ^= r0
|
||||
|
||||
2x,0 W4right19 = X4 unsigned>> 19
|
||||
r6Sigma1 ^= r618
|
||||
r2Sigma0 = r2>>>28
|
||||
r1 += ch1
|
||||
|
||||
2x,0 W4left45 = X4 << 45
|
||||
r6Sigma1 ^= r641
|
||||
r234 = r2>>>34
|
||||
maj0 = r3
|
||||
maj0 ^= r2
|
||||
|
||||
2x,0 W4right61 = X4 unsigned>> 61
|
||||
1x,0 W4sigma1 = W4right19 ^ W4left45
|
||||
r2Sigma0 ^= r234
|
||||
r239 = r2>>>39
|
||||
|
||||
2x,0 W4left3 = X4 << 3
|
||||
1x,0 W4sigma1 ^= W4right61
|
||||
r2Sigma0 ^= r239
|
||||
r1 += r6Sigma1
|
||||
|
||||
2x,0 W4right6 = X4 unsigned>> 6
|
||||
1x,0 W4sigma1 ^= W4left3
|
||||
r5 += r1
|
||||
r2andr3 = r3
|
||||
r2andr3 &= r2
|
||||
|
||||
1x,0 W4sigma1 ^= W4right6
|
||||
r5Sigma1 = r5>>>14
|
||||
r1 += r2Sigma0
|
||||
maj1 = r4
|
||||
maj1 &= maj0
|
||||
|
||||
W4sigma1 = W4sigma1[1],W4sigma1[0]
|
||||
maj1 ^= r2andr3
|
||||
ch0 = r7
|
||||
ch0 ^= r6
|
||||
|
||||
r1 += maj1
|
||||
ch0 &= r5
|
||||
r518 = r5>>>18
|
||||
|
||||
maj0 &= r1
|
||||
ch0 ^= r7
|
||||
r0 += wc12131415[3]
|
||||
r5Sigma1 ^= r518
|
||||
|
||||
r541 = r5>>>41
|
||||
4x X4 = X4 + W4sigma1
|
||||
mem256[&w + 32] = X4
|
||||
r0 += ch0
|
||||
maj0 ^= r2andr3
|
||||
|
||||
r1Sigma0 = r1>>>28
|
||||
4x D4 = X4 + mem256[constants + 32]
|
||||
wc4567 = D4
|
||||
r134 = r1>>>34
|
||||
r5Sigma1 ^= r541
|
||||
|
||||
r0 += r5Sigma1
|
||||
r1Sigma0 ^= r134
|
||||
r139 = r1>>>39
|
||||
|
||||
r4 += r0
|
||||
r0 += maj0
|
||||
r1Sigma0 ^= r139
|
||||
|
||||
r0 += r1Sigma0
|
||||
@@ -0,0 +1,166 @@
|
||||
X9 = mem256[&w + 72]
|
||||
4x X9right1 = X9 unsigned>> 1
|
||||
r4Sigma1 = r4>>>14
|
||||
r7 += wc0123[0]
|
||||
ch7 = r6
|
||||
ch7 ^= r5
|
||||
|
||||
4x X9left63 = X9 << 63
|
||||
r418 = r4>>>18
|
||||
ch7 &= r4
|
||||
maj6 = r1
|
||||
maj6 ^= r0
|
||||
|
||||
X9sigma0 = X9right1 ^ X9left63
|
||||
r441 = r4>>>41
|
||||
r4Sigma1 ^= r418
|
||||
ch7 ^= r6
|
||||
|
||||
4x X9right8 = X9 unsigned>> 8
|
||||
r4Sigma1 ^= r441
|
||||
r0Sigma0 = r0>>>28
|
||||
r7 += ch7
|
||||
|
||||
X9sigma0 = X9sigma0 ^ X9right8
|
||||
r034 = r0>>>34
|
||||
r7 += r4Sigma1
|
||||
maj7 = r2
|
||||
maj7 &= maj6
|
||||
|
||||
W6 = mem128[&w + 48],0
|
||||
2x,0 W6right19 = W6 unsigned>> 19
|
||||
r0Sigma0 ^= r034
|
||||
r039 = r0>>>39
|
||||
r3 += r7
|
||||
|
||||
4x X9left56 = X9 << 56
|
||||
r0Sigma0 ^= r039
|
||||
r6 += wc0123[1]
|
||||
r0andr1 = r1
|
||||
r0andr1 &= r0
|
||||
|
||||
2x,0 W6left45 = W6 << 45
|
||||
r7 += r0Sigma0
|
||||
maj7 ^= r0andr1
|
||||
ch6 = r5
|
||||
ch6 ^= r4
|
||||
|
||||
X9sigma0 = X9sigma0 ^ X9left56
|
||||
2x,0 W6right61 = W6 unsigned>> 61
|
||||
r3Sigma1 = r3>>>14
|
||||
r7 += maj7
|
||||
|
||||
4x X9right7 = X9 unsigned>> 7
|
||||
1x,0 W6sigma1 = W6right19 ^ W6left45
|
||||
r318 = r3>>>18
|
||||
ch6 &= r3
|
||||
|
||||
X9sigma0 = X9sigma0 ^ X9right7
|
||||
r3Sigma1 ^= r318
|
||||
r341 = r3>>>41
|
||||
maj6 &= r7
|
||||
|
||||
1x,0 W6sigma1 ^= W6right61
|
||||
4x X8 = X8 + mem256[&w + 8]
|
||||
r3Sigma1 ^= r341
|
||||
maj6 ^= r0andr1
|
||||
|
||||
2x,0 W6left3 = W6 << 3
|
||||
r7Sigma0 = r7>>>28
|
||||
ch6 ^= r5
|
||||
r6 += r3Sigma1
|
||||
|
||||
4x X8 = X8 + X9sigma0
|
||||
r734 = r7>>>34
|
||||
r5 += wc0123[2]
|
||||
r6 += ch6
|
||||
|
||||
1x,0 W6sigma1 ^= W6left3
|
||||
r7Sigma0 ^= r734
|
||||
r739 = r7>>>39
|
||||
r2 += r6
|
||||
|
||||
2x,0 W6right6 = W6 unsigned>> 6
|
||||
r7Sigma0 ^= r739
|
||||
r6 += maj6
|
||||
ch5 = r4
|
||||
ch5 ^= r3
|
||||
|
||||
1x,0 W6sigma1 ^= W6right6
|
||||
r6 += r7Sigma0
|
||||
r2Sigma1 = r2>>>14
|
||||
ch5 &= r2
|
||||
|
||||
4x X8 = W6sigma1 + X8
|
||||
r218 = r2>>>18
|
||||
r241 = r2>>>41
|
||||
ch5 ^= r4
|
||||
|
||||
2x,0 W8right19 = X8 unsigned>> 19
|
||||
r2Sigma1 ^= r218
|
||||
r6Sigma0 = r6>>>28
|
||||
r5 += ch5
|
||||
|
||||
2x,0 W8left45 = X8 << 45
|
||||
r2Sigma1 ^= r241
|
||||
r634 = r6>>>34
|
||||
maj4 = r7
|
||||
maj4 ^= r6
|
||||
|
||||
2x,0 W8right61 = X8 unsigned>> 61
|
||||
1x,0 W8sigma1 = W8right19 ^ W8left45
|
||||
r6Sigma0 ^= r634
|
||||
r639 = r6>>>39
|
||||
|
||||
2x,0 W8left3 = X8 << 3
|
||||
1x,0 W8sigma1 ^= W8right61
|
||||
r6Sigma0 ^= r639
|
||||
r5 += r2Sigma1
|
||||
|
||||
2x,0 W8right6 = X8 unsigned>> 6
|
||||
1x,0 W8sigma1 ^= W8left3
|
||||
r1 += r5
|
||||
r6andr7 = r7
|
||||
r6andr7 &= r6
|
||||
|
||||
1x,0 W8sigma1 ^= W8right6
|
||||
r1Sigma1 = r1>>>14
|
||||
r5 += r6Sigma0
|
||||
maj5 = r0
|
||||
maj5 &= maj4
|
||||
|
||||
W8sigma1 = W8sigma1[1],W8sigma1[0]
|
||||
maj5 ^= r6andr7
|
||||
ch4 = r3
|
||||
ch4 ^= r2
|
||||
|
||||
r5 += maj5
|
||||
ch4 &= r1
|
||||
r118 = r1>>>18
|
||||
|
||||
maj4 &= r5
|
||||
ch4 ^= r3
|
||||
r4 += wc0123[3]
|
||||
r1Sigma1 ^= r118
|
||||
|
||||
r141 = r1>>>41
|
||||
4x X8 = X8 + W8sigma1
|
||||
mem256[&w + 64] = X8
|
||||
r4 += ch4
|
||||
maj4 ^= r6andr7
|
||||
|
||||
r5Sigma0 = r5>>>28
|
||||
4x D8 = X8 + mem256[constants + 64]
|
||||
wc891011 = D8
|
||||
r534 = r5>>>34
|
||||
r1Sigma1 ^= r141
|
||||
|
||||
r4 += r1Sigma1
|
||||
r5Sigma0 ^= r534
|
||||
r539 = r5>>>39
|
||||
|
||||
r0 += r4
|
||||
r4 += maj4
|
||||
r5Sigma0 ^= r539
|
||||
|
||||
r4 += r5Sigma0
|
||||
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
i = 0
|
||||
|
||||
for doubleround in range(8):
|
||||
i0 = (i+0)&7
|
||||
i1 = (i+1)&7
|
||||
i2 = (i+2)&7
|
||||
i3 = (i+3)&7
|
||||
i4 = (i+4)&7
|
||||
i5 = (i+5)&7
|
||||
i6 = (i+6)&7
|
||||
i7 = (i+7)&7
|
||||
|
||||
round = 2*doubleround
|
||||
round4 = round&~3
|
||||
loadarray = 'wc%d%d%d%d' % (round4,round4+1,round4+2,round4+3)
|
||||
i0load = '%s[%d]' % (loadarray,(round+0)&3)
|
||||
i1load = '%s[%d]' % (loadarray,(round+1)&3)
|
||||
|
||||
i -= 2
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] != str(doubleround):
|
||||
continue
|
||||
|
||||
|
||||
print(' r%d += %s' % (i7,i0load))
|
||||
print(' r%dSigma1 = r%d>>>14' % (i4,i4))
|
||||
print(' ch%d = r%d' % (i7,i6))
|
||||
print(' r%d18 = r%d>>>18' % (i4,i4))
|
||||
print(' ch%d ^= r%d' % (i7,i5))
|
||||
|
||||
print(' r%d41 = r%d>>>41' % (i4,i4))
|
||||
print(' r%dSigma1 ^= r%d18' % (i4,i4))
|
||||
print(' ch%d &= r%d' % (i7,i4))
|
||||
print(' r%dSigma0 = r%d>>>28' % (i0,i0))
|
||||
|
||||
print(' r%dSigma1 ^= r%d41' % (i4,i4))
|
||||
print(' ch%d ^= r%d' % (i7,i6))
|
||||
print(' r%d34 = r%d>>>34' % (i0,i0))
|
||||
print(' maj%d = r%d' % (i6,i1))
|
||||
print(' maj%d ^= r%d' % (i6,i0))
|
||||
|
||||
print(' r%dSigma0 ^= r%d34' % (i0,i0))
|
||||
print(' r%d += ch%d' % (i7,i7))
|
||||
print(' r%dandr%d = r%d' % (i0,i1,i1))
|
||||
print(' r%d39 = r%d>>>39' % (i0,i0))
|
||||
print(' r%dandr%d &= r%d' % (i0,i1,i0))
|
||||
|
||||
print(' r%dSigma0 ^= r%d39' % (i0,i0))
|
||||
print(' r%d += r%dSigma1' % (i7,i4))
|
||||
print(' maj%d = r%d' % (i7,i2))
|
||||
print(' r%d += %s' % (i6,i1load))
|
||||
print(' maj%d &= maj%d' % (i7,i6))
|
||||
|
||||
print(' r%d += r%d' % (i3,i7))
|
||||
print(' r%d += r%dSigma0' % (i7,i0))
|
||||
print(' ch%d = r%d' % (i6,i5))
|
||||
print(' maj%d ^= r%dandr%d' % (i7,i0,i1))
|
||||
print(' ch%d ^= r%d' % (i6,i4))
|
||||
|
||||
print(' r%dSigma1 = r%d>>>14' % (i3,i3))
|
||||
print(' r%d += maj%d' % (i7,i7))
|
||||
print(' ch%d &= r%d' % (i6,i3))
|
||||
print(' r%d18 = r%d>>>18' % (i3,i3))
|
||||
|
||||
print(' r%dSigma1 ^= r%d18' % (i3,i3))
|
||||
print(' maj%d &= r%d' % (i6,i7))
|
||||
print(' ch%d ^= r%d' % (i6,i5))
|
||||
print(' r%d41 = r%d>>>41' % (i3,i3))
|
||||
|
||||
print(' r%dSigma1 ^= r%d41' % (i3,i3))
|
||||
print(' r%dSigma0 = r%d>>>28' % (i7,i7))
|
||||
print(' maj%d ^= r%dandr%d' % (i6,i0,i1))
|
||||
print(' r%d += ch%d' % (i6,i6))
|
||||
|
||||
print(' r%d += r%dSigma1' % (i6,i3))
|
||||
print(' r%d34 = r%d>>>34' % (i7,i7))
|
||||
|
||||
print(' r%dSigma0 ^= r%d34' % (i7,i7))
|
||||
print(' r%d += r%d' % (i2,i6))
|
||||
print(' r%d += maj%d' % (i6,i6))
|
||||
print(' r%d39 = r%d>>>39' % (i7,i7))
|
||||
|
||||
print(' r%dSigma0 ^= r%d39' % (i7,i7))
|
||||
|
||||
print(' r%d += r%dSigma0' % (i6,i7))
|
||||
@@ -0,0 +1,49 @@
|
||||
r7 += wc0123[0]
|
||||
r4Sigma1 = r4>>>14
|
||||
ch7 = r6
|
||||
r418 = r4>>>18
|
||||
ch7 ^= r5
|
||||
r441 = r4>>>41
|
||||
r4Sigma1 ^= r418
|
||||
ch7 &= r4
|
||||
r0Sigma0 = r0>>>28
|
||||
r4Sigma1 ^= r441
|
||||
ch7 ^= r6
|
||||
r034 = r0>>>34
|
||||
maj6 = r1
|
||||
maj6 ^= r0
|
||||
r0Sigma0 ^= r034
|
||||
r7 += ch7
|
||||
r0andr1 = r1
|
||||
r039 = r0>>>39
|
||||
r0andr1 &= r0
|
||||
r0Sigma0 ^= r039
|
||||
r7 += r4Sigma1
|
||||
maj7 = r2
|
||||
r6 += wc0123[1]
|
||||
maj7 &= maj6
|
||||
r3 += r7
|
||||
r7 += r0Sigma0
|
||||
ch6 = r5
|
||||
maj7 ^= r0andr1
|
||||
ch6 ^= r4
|
||||
r3Sigma1 = r3>>>14
|
||||
r7 += maj7
|
||||
ch6 &= r3
|
||||
r318 = r3>>>18
|
||||
r3Sigma1 ^= r318
|
||||
maj6 &= r7
|
||||
ch6 ^= r5
|
||||
r341 = r3>>>41
|
||||
r3Sigma1 ^= r341
|
||||
r7Sigma0 = r7>>>28
|
||||
maj6 ^= r0andr1
|
||||
r6 += ch6
|
||||
r6 += r3Sigma1
|
||||
r734 = r7>>>34
|
||||
r7Sigma0 ^= r734
|
||||
r2 += r6
|
||||
r6 += maj6
|
||||
r739 = r7>>>39
|
||||
r7Sigma0 ^= r739
|
||||
r6 += r7Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r5 += wc891011[2]
|
||||
r2Sigma1 = r2>>>14
|
||||
ch5 = r4
|
||||
r218 = r2>>>18
|
||||
ch5 ^= r3
|
||||
r241 = r2>>>41
|
||||
r2Sigma1 ^= r218
|
||||
ch5 &= r2
|
||||
r6Sigma0 = r6>>>28
|
||||
r2Sigma1 ^= r241
|
||||
ch5 ^= r4
|
||||
r634 = r6>>>34
|
||||
maj4 = r7
|
||||
maj4 ^= r6
|
||||
r6Sigma0 ^= r634
|
||||
r5 += ch5
|
||||
r6andr7 = r7
|
||||
r639 = r6>>>39
|
||||
r6andr7 &= r6
|
||||
r6Sigma0 ^= r639
|
||||
r5 += r2Sigma1
|
||||
maj5 = r0
|
||||
r4 += wc891011[3]
|
||||
maj5 &= maj4
|
||||
r1 += r5
|
||||
r5 += r6Sigma0
|
||||
ch4 = r3
|
||||
maj5 ^= r6andr7
|
||||
ch4 ^= r2
|
||||
r1Sigma1 = r1>>>14
|
||||
r5 += maj5
|
||||
ch4 &= r1
|
||||
r118 = r1>>>18
|
||||
r1Sigma1 ^= r118
|
||||
maj4 &= r5
|
||||
ch4 ^= r3
|
||||
r141 = r1>>>41
|
||||
r1Sigma1 ^= r141
|
||||
r5Sigma0 = r5>>>28
|
||||
maj4 ^= r6andr7
|
||||
r4 += ch4
|
||||
r4 += r1Sigma1
|
||||
r534 = r5>>>34
|
||||
r5Sigma0 ^= r534
|
||||
r0 += r4
|
||||
r4 += maj4
|
||||
r539 = r5>>>39
|
||||
r5Sigma0 ^= r539
|
||||
r4 += r5Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r3 += wc12131415[0]
|
||||
r0Sigma1 = r0>>>14
|
||||
ch3 = r2
|
||||
r018 = r0>>>18
|
||||
ch3 ^= r1
|
||||
r041 = r0>>>41
|
||||
r0Sigma1 ^= r018
|
||||
ch3 &= r0
|
||||
r4Sigma0 = r4>>>28
|
||||
r0Sigma1 ^= r041
|
||||
ch3 ^= r2
|
||||
r434 = r4>>>34
|
||||
maj2 = r5
|
||||
maj2 ^= r4
|
||||
r4Sigma0 ^= r434
|
||||
r3 += ch3
|
||||
r4andr5 = r5
|
||||
r439 = r4>>>39
|
||||
r4andr5 &= r4
|
||||
r4Sigma0 ^= r439
|
||||
r3 += r0Sigma1
|
||||
maj3 = r6
|
||||
r2 += wc12131415[1]
|
||||
maj3 &= maj2
|
||||
r7 += r3
|
||||
r3 += r4Sigma0
|
||||
ch2 = r1
|
||||
maj3 ^= r4andr5
|
||||
ch2 ^= r0
|
||||
r7Sigma1 = r7>>>14
|
||||
r3 += maj3
|
||||
ch2 &= r7
|
||||
r718 = r7>>>18
|
||||
r7Sigma1 ^= r718
|
||||
maj2 &= r3
|
||||
ch2 ^= r1
|
||||
r741 = r7>>>41
|
||||
r7Sigma1 ^= r741
|
||||
r3Sigma0 = r3>>>28
|
||||
maj2 ^= r4andr5
|
||||
r2 += ch2
|
||||
r2 += r7Sigma1
|
||||
r334 = r3>>>34
|
||||
r3Sigma0 ^= r334
|
||||
r6 += r2
|
||||
r2 += maj2
|
||||
r339 = r3>>>39
|
||||
r3Sigma0 ^= r339
|
||||
r2 += r3Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r1 += wc12131415[2]
|
||||
r6Sigma1 = r6>>>14
|
||||
ch1 = r0
|
||||
r618 = r6>>>18
|
||||
ch1 ^= r7
|
||||
r641 = r6>>>41
|
||||
r6Sigma1 ^= r618
|
||||
ch1 &= r6
|
||||
r2Sigma0 = r2>>>28
|
||||
r6Sigma1 ^= r641
|
||||
ch1 ^= r0
|
||||
r234 = r2>>>34
|
||||
maj0 = r3
|
||||
maj0 ^= r2
|
||||
r2Sigma0 ^= r234
|
||||
r1 += ch1
|
||||
r2andr3 = r3
|
||||
r239 = r2>>>39
|
||||
r2andr3 &= r2
|
||||
r2Sigma0 ^= r239
|
||||
r1 += r6Sigma1
|
||||
maj1 = r4
|
||||
r0 += wc12131415[3]
|
||||
maj1 &= maj0
|
||||
r5 += r1
|
||||
r1 += r2Sigma0
|
||||
ch0 = r7
|
||||
maj1 ^= r2andr3
|
||||
ch0 ^= r6
|
||||
r5Sigma1 = r5>>>14
|
||||
r1 += maj1
|
||||
ch0 &= r5
|
||||
r518 = r5>>>18
|
||||
r5Sigma1 ^= r518
|
||||
maj0 &= r1
|
||||
ch0 ^= r7
|
||||
r541 = r5>>>41
|
||||
r5Sigma1 ^= r541
|
||||
r1Sigma0 = r1>>>28
|
||||
maj0 ^= r2andr3
|
||||
r0 += ch0
|
||||
r0 += r5Sigma1
|
||||
r134 = r1>>>34
|
||||
r1Sigma0 ^= r134
|
||||
r4 += r0
|
||||
r0 += maj0
|
||||
r139 = r1>>>39
|
||||
r1Sigma0 ^= r139
|
||||
r0 += r1Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r5 += wc0123[2]
|
||||
r2Sigma1 = r2>>>14
|
||||
ch5 = r4
|
||||
r218 = r2>>>18
|
||||
ch5 ^= r3
|
||||
r241 = r2>>>41
|
||||
r2Sigma1 ^= r218
|
||||
ch5 &= r2
|
||||
r6Sigma0 = r6>>>28
|
||||
r2Sigma1 ^= r241
|
||||
ch5 ^= r4
|
||||
r634 = r6>>>34
|
||||
maj4 = r7
|
||||
maj4 ^= r6
|
||||
r6Sigma0 ^= r634
|
||||
r5 += ch5
|
||||
r6andr7 = r7
|
||||
r639 = r6>>>39
|
||||
r6andr7 &= r6
|
||||
r6Sigma0 ^= r639
|
||||
r5 += r2Sigma1
|
||||
maj5 = r0
|
||||
r4 += wc0123[3]
|
||||
maj5 &= maj4
|
||||
r1 += r5
|
||||
r5 += r6Sigma0
|
||||
ch4 = r3
|
||||
maj5 ^= r6andr7
|
||||
ch4 ^= r2
|
||||
r1Sigma1 = r1>>>14
|
||||
r5 += maj5
|
||||
ch4 &= r1
|
||||
r118 = r1>>>18
|
||||
r1Sigma1 ^= r118
|
||||
maj4 &= r5
|
||||
ch4 ^= r3
|
||||
r141 = r1>>>41
|
||||
r1Sigma1 ^= r141
|
||||
r5Sigma0 = r5>>>28
|
||||
maj4 ^= r6andr7
|
||||
r4 += ch4
|
||||
r4 += r1Sigma1
|
||||
r534 = r5>>>34
|
||||
r5Sigma0 ^= r534
|
||||
r0 += r4
|
||||
r4 += maj4
|
||||
r539 = r5>>>39
|
||||
r5Sigma0 ^= r539
|
||||
r4 += r5Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r3 += wc4567[0]
|
||||
r0Sigma1 = r0>>>14
|
||||
ch3 = r2
|
||||
r018 = r0>>>18
|
||||
ch3 ^= r1
|
||||
r041 = r0>>>41
|
||||
r0Sigma1 ^= r018
|
||||
ch3 &= r0
|
||||
r4Sigma0 = r4>>>28
|
||||
r0Sigma1 ^= r041
|
||||
ch3 ^= r2
|
||||
r434 = r4>>>34
|
||||
maj2 = r5
|
||||
maj2 ^= r4
|
||||
r4Sigma0 ^= r434
|
||||
r3 += ch3
|
||||
r4andr5 = r5
|
||||
r439 = r4>>>39
|
||||
r4andr5 &= r4
|
||||
r4Sigma0 ^= r439
|
||||
r3 += r0Sigma1
|
||||
maj3 = r6
|
||||
r2 += wc4567[1]
|
||||
maj3 &= maj2
|
||||
r7 += r3
|
||||
r3 += r4Sigma0
|
||||
ch2 = r1
|
||||
maj3 ^= r4andr5
|
||||
ch2 ^= r0
|
||||
r7Sigma1 = r7>>>14
|
||||
r3 += maj3
|
||||
ch2 &= r7
|
||||
r718 = r7>>>18
|
||||
r7Sigma1 ^= r718
|
||||
maj2 &= r3
|
||||
ch2 ^= r1
|
||||
r741 = r7>>>41
|
||||
r7Sigma1 ^= r741
|
||||
r3Sigma0 = r3>>>28
|
||||
maj2 ^= r4andr5
|
||||
r2 += ch2
|
||||
r2 += r7Sigma1
|
||||
r334 = r3>>>34
|
||||
r3Sigma0 ^= r334
|
||||
r6 += r2
|
||||
r2 += maj2
|
||||
r339 = r3>>>39
|
||||
r3Sigma0 ^= r339
|
||||
r2 += r3Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r1 += wc4567[2]
|
||||
r6Sigma1 = r6>>>14
|
||||
ch1 = r0
|
||||
r618 = r6>>>18
|
||||
ch1 ^= r7
|
||||
r641 = r6>>>41
|
||||
r6Sigma1 ^= r618
|
||||
ch1 &= r6
|
||||
r2Sigma0 = r2>>>28
|
||||
r6Sigma1 ^= r641
|
||||
ch1 ^= r0
|
||||
r234 = r2>>>34
|
||||
maj0 = r3
|
||||
maj0 ^= r2
|
||||
r2Sigma0 ^= r234
|
||||
r1 += ch1
|
||||
r2andr3 = r3
|
||||
r239 = r2>>>39
|
||||
r2andr3 &= r2
|
||||
r2Sigma0 ^= r239
|
||||
r1 += r6Sigma1
|
||||
maj1 = r4
|
||||
r0 += wc4567[3]
|
||||
maj1 &= maj0
|
||||
r5 += r1
|
||||
r1 += r2Sigma0
|
||||
ch0 = r7
|
||||
maj1 ^= r2andr3
|
||||
ch0 ^= r6
|
||||
r5Sigma1 = r5>>>14
|
||||
r1 += maj1
|
||||
ch0 &= r5
|
||||
r518 = r5>>>18
|
||||
r5Sigma1 ^= r518
|
||||
maj0 &= r1
|
||||
ch0 ^= r7
|
||||
r541 = r5>>>41
|
||||
r5Sigma1 ^= r541
|
||||
r1Sigma0 = r1>>>28
|
||||
maj0 ^= r2andr3
|
||||
r0 += ch0
|
||||
r0 += r5Sigma1
|
||||
r134 = r1>>>34
|
||||
r1Sigma0 ^= r134
|
||||
r4 += r0
|
||||
r0 += maj0
|
||||
r139 = r1>>>39
|
||||
r1Sigma0 ^= r139
|
||||
r0 += r1Sigma0
|
||||
@@ -0,0 +1,49 @@
|
||||
r7 += wc891011[0]
|
||||
r4Sigma1 = r4>>>14
|
||||
ch7 = r6
|
||||
r418 = r4>>>18
|
||||
ch7 ^= r5
|
||||
r441 = r4>>>41
|
||||
r4Sigma1 ^= r418
|
||||
ch7 &= r4
|
||||
r0Sigma0 = r0>>>28
|
||||
r4Sigma1 ^= r441
|
||||
ch7 ^= r6
|
||||
r034 = r0>>>34
|
||||
maj6 = r1
|
||||
maj6 ^= r0
|
||||
r0Sigma0 ^= r034
|
||||
r7 += ch7
|
||||
r0andr1 = r1
|
||||
r039 = r0>>>39
|
||||
r0andr1 &= r0
|
||||
r0Sigma0 ^= r039
|
||||
r7 += r4Sigma1
|
||||
maj7 = r2
|
||||
r6 += wc891011[1]
|
||||
maj7 &= maj6
|
||||
r3 += r7
|
||||
r7 += r0Sigma0
|
||||
ch6 = r5
|
||||
maj7 ^= r0andr1
|
||||
ch6 ^= r4
|
||||
r3Sigma1 = r3>>>14
|
||||
r7 += maj7
|
||||
ch6 &= r3
|
||||
r318 = r3>>>18
|
||||
r3Sigma1 ^= r318
|
||||
maj6 &= r7
|
||||
ch6 ^= r5
|
||||
r341 = r3>>>41
|
||||
r3Sigma1 ^= r341
|
||||
r7Sigma0 = r7>>>28
|
||||
maj6 ^= r0andr1
|
||||
r6 += ch6
|
||||
r6 += r3Sigma1
|
||||
r734 = r7>>>34
|
||||
r7Sigma0 ^= r734
|
||||
r2 += r6
|
||||
r6 += maj6
|
||||
r739 = r7>>>39
|
||||
r7Sigma0 ^= r739
|
||||
r6 += r7Sigma0
|
||||
@@ -0,0 +1 @@
|
||||
../m3/api.h
|
||||
@@ -0,0 +1,218 @@
|
||||
#include "crypto_hashblocks.h"
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
static uint64 load_bigendian(const unsigned char *x)
|
||||
{
|
||||
return
|
||||
(uint64) (x[7]) \
|
||||
| (((uint64) (x[6])) << 8) \
|
||||
| (((uint64) (x[5])) << 16) \
|
||||
| (((uint64) (x[4])) << 24) \
|
||||
| (((uint64) (x[3])) << 32) \
|
||||
| (((uint64) (x[2])) << 40) \
|
||||
| (((uint64) (x[1])) << 48) \
|
||||
| (((uint64) (x[0])) << 56)
|
||||
;
|
||||
}
|
||||
|
||||
static void store_bigendian(unsigned char *x,uint64 u)
|
||||
{
|
||||
x[7] = u; u >>= 8;
|
||||
x[6] = u; u >>= 8;
|
||||
x[5] = u; u >>= 8;
|
||||
x[4] = u; u >>= 8;
|
||||
x[3] = u; u >>= 8;
|
||||
x[2] = u; u >>= 8;
|
||||
x[1] = u; u >>= 8;
|
||||
x[0] = u;
|
||||
}
|
||||
|
||||
#define SHR(x,c) ((x) >> (c))
|
||||
#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
|
||||
|
||||
#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
|
||||
#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
|
||||
|
||||
#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
|
||||
|
||||
static void expand(uint64 *w)
|
||||
{
|
||||
M(w[0] ,w[14],w[9] ,w[1] )
|
||||
M(w[1] ,w[15],w[10],w[2] )
|
||||
M(w[2] ,w[0] ,w[11],w[3] )
|
||||
M(w[3] ,w[1] ,w[12],w[4] )
|
||||
M(w[4] ,w[2] ,w[13],w[5] )
|
||||
M(w[5] ,w[3] ,w[14],w[6] )
|
||||
M(w[6] ,w[4] ,w[15],w[7] )
|
||||
M(w[7] ,w[5] ,w[0] ,w[8] )
|
||||
M(w[8] ,w[6] ,w[1] ,w[9] )
|
||||
M(w[9] ,w[7] ,w[2] ,w[10])
|
||||
M(w[10],w[8] ,w[3] ,w[11])
|
||||
M(w[11],w[9] ,w[4] ,w[12])
|
||||
M(w[12],w[10],w[5] ,w[13])
|
||||
M(w[13],w[11],w[6] ,w[14])
|
||||
M(w[14],w[12],w[7] ,w[15])
|
||||
M(w[15],w[13],w[8] ,w[0] )
|
||||
}
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) ((x & (y ^ z)) ^ (y & z))
|
||||
#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
|
||||
#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
|
||||
|
||||
#define F(r0,r1,r2,r3,r4,r5,r6,r7,w,k) \
|
||||
r7 += Sigma1(r4) + Ch(r4,r5,r6) + k + w; \
|
||||
r3 += r7; \
|
||||
r7 += Sigma0(r0) + Maj(r0,r1,r2);
|
||||
|
||||
static void handle(uint64 *r,const uint64 *w,const uint64 *c)
|
||||
{
|
||||
F(r[0],r[1],r[2],r[3],r[4],r[5],r[6],r[7],w[0] ,c[0])
|
||||
F(r[7],r[0],r[1],r[2],r[3],r[4],r[5],r[6],w[1] ,c[1])
|
||||
F(r[6],r[7],r[0],r[1],r[2],r[3],r[4],r[5],w[2] ,c[2])
|
||||
F(r[5],r[6],r[7],r[0],r[1],r[2],r[3],r[4],w[3] ,c[3])
|
||||
F(r[4],r[5],r[6],r[7],r[0],r[1],r[2],r[3],w[4] ,c[4])
|
||||
F(r[3],r[4],r[5],r[6],r[7],r[0],r[1],r[2],w[5] ,c[5])
|
||||
F(r[2],r[3],r[4],r[5],r[6],r[7],r[0],r[1],w[6] ,c[6])
|
||||
F(r[1],r[2],r[3],r[4],r[5],r[6],r[7],r[0],w[7] ,c[7])
|
||||
F(r[0],r[1],r[2],r[3],r[4],r[5],r[6],r[7],w[8] ,c[8])
|
||||
F(r[7],r[0],r[1],r[2],r[3],r[4],r[5],r[6],w[9] ,c[9])
|
||||
F(r[6],r[7],r[0],r[1],r[2],r[3],r[4],r[5],w[10],c[10])
|
||||
F(r[5],r[6],r[7],r[0],r[1],r[2],r[3],r[4],w[11],c[11])
|
||||
F(r[4],r[5],r[6],r[7],r[0],r[1],r[2],r[3],w[12],c[12])
|
||||
F(r[3],r[4],r[5],r[6],r[7],r[0],r[1],r[2],w[13],c[13])
|
||||
F(r[2],r[3],r[4],r[5],r[6],r[7],r[0],r[1],w[14],c[14])
|
||||
F(r[1],r[2],r[3],r[4],r[5],r[6],r[7],r[0],w[15],c[15])
|
||||
}
|
||||
|
||||
static const uint64 round[80] = {
|
||||
0x428a2f98d728ae22ULL
|
||||
, 0x7137449123ef65cdULL
|
||||
, 0xb5c0fbcfec4d3b2fULL
|
||||
, 0xe9b5dba58189dbbcULL
|
||||
, 0x3956c25bf348b538ULL
|
||||
, 0x59f111f1b605d019ULL
|
||||
, 0x923f82a4af194f9bULL
|
||||
, 0xab1c5ed5da6d8118ULL
|
||||
, 0xd807aa98a3030242ULL
|
||||
, 0x12835b0145706fbeULL
|
||||
, 0x243185be4ee4b28cULL
|
||||
, 0x550c7dc3d5ffb4e2ULL
|
||||
, 0x72be5d74f27b896fULL
|
||||
, 0x80deb1fe3b1696b1ULL
|
||||
, 0x9bdc06a725c71235ULL
|
||||
, 0xc19bf174cf692694ULL
|
||||
, 0xe49b69c19ef14ad2ULL
|
||||
, 0xefbe4786384f25e3ULL
|
||||
, 0x0fc19dc68b8cd5b5ULL
|
||||
, 0x240ca1cc77ac9c65ULL
|
||||
, 0x2de92c6f592b0275ULL
|
||||
, 0x4a7484aa6ea6e483ULL
|
||||
, 0x5cb0a9dcbd41fbd4ULL
|
||||
, 0x76f988da831153b5ULL
|
||||
, 0x983e5152ee66dfabULL
|
||||
, 0xa831c66d2db43210ULL
|
||||
, 0xb00327c898fb213fULL
|
||||
, 0xbf597fc7beef0ee4ULL
|
||||
, 0xc6e00bf33da88fc2ULL
|
||||
, 0xd5a79147930aa725ULL
|
||||
, 0x06ca6351e003826fULL
|
||||
, 0x142929670a0e6e70ULL
|
||||
, 0x27b70a8546d22ffcULL
|
||||
, 0x2e1b21385c26c926ULL
|
||||
, 0x4d2c6dfc5ac42aedULL
|
||||
, 0x53380d139d95b3dfULL
|
||||
, 0x650a73548baf63deULL
|
||||
, 0x766a0abb3c77b2a8ULL
|
||||
, 0x81c2c92e47edaee6ULL
|
||||
, 0x92722c851482353bULL
|
||||
, 0xa2bfe8a14cf10364ULL
|
||||
, 0xa81a664bbc423001ULL
|
||||
, 0xc24b8b70d0f89791ULL
|
||||
, 0xc76c51a30654be30ULL
|
||||
, 0xd192e819d6ef5218ULL
|
||||
, 0xd69906245565a910ULL
|
||||
, 0xf40e35855771202aULL
|
||||
, 0x106aa07032bbd1b8ULL
|
||||
, 0x19a4c116b8d2d0c8ULL
|
||||
, 0x1e376c085141ab53ULL
|
||||
, 0x2748774cdf8eeb99ULL
|
||||
, 0x34b0bcb5e19b48a8ULL
|
||||
, 0x391c0cb3c5c95a63ULL
|
||||
, 0x4ed8aa4ae3418acbULL
|
||||
, 0x5b9cca4f7763e373ULL
|
||||
, 0x682e6ff3d6b2b8a3ULL
|
||||
, 0x748f82ee5defb2fcULL
|
||||
, 0x78a5636f43172f60ULL
|
||||
, 0x84c87814a1f0ab72ULL
|
||||
, 0x8cc702081a6439ecULL
|
||||
, 0x90befffa23631e28ULL
|
||||
, 0xa4506cebde82bde9ULL
|
||||
, 0xbef9a3f7b2c67915ULL
|
||||
, 0xc67178f2e372532bULL
|
||||
, 0xca273eceea26619cULL
|
||||
, 0xd186b8c721c0c207ULL
|
||||
, 0xeada7dd6cde0eb1eULL
|
||||
, 0xf57d4f7fee6ed178ULL
|
||||
, 0x06f067aa72176fbaULL
|
||||
, 0x0a637dc5a2c898a6ULL
|
||||
, 0x113f9804bef90daeULL
|
||||
, 0x1b710b35131c471bULL
|
||||
, 0x28db77f523047d84ULL
|
||||
, 0x32caab7b40c72493ULL
|
||||
, 0x3c9ebe0a15c9bebcULL
|
||||
, 0x431d67c49c100d4cULL
|
||||
, 0x4cc5d4becb3e42b6ULL
|
||||
, 0x597f299cfc657e2aULL
|
||||
, 0x5fcb6fab3ad6faecULL
|
||||
, 0x6c44198c4a475817ULL
|
||||
};
|
||||
|
||||
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,long long inlen)
|
||||
{
|
||||
uint64 w[16];
|
||||
uint64 state[8];
|
||||
uint64 r[8];
|
||||
int i;
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
state[i] = r[i] = load_bigendian(statebytes+8*i);
|
||||
|
||||
while (inlen >= 128) {
|
||||
for (i = 0;i < 16;++i)
|
||||
w[i] = load_bigendian(in+8*i);
|
||||
|
||||
handle(r,w,round+0);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+16);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+32);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+48);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+64);
|
||||
|
||||
for (i = 0;i < 8;++i) {
|
||||
uint64 x = r[i]+state[i];
|
||||
state[i] = x;
|
||||
r[i] = x;
|
||||
}
|
||||
|
||||
in += 128;
|
||||
inlen -= 128;
|
||||
}
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
store_bigendian(statebytes+8*i,state[i]);
|
||||
|
||||
return inlen;
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../m3/implementors
|
||||
@@ -0,0 +1 @@
|
||||
../m3/api.h
|
||||
@@ -0,0 +1,198 @@
|
||||
#include "crypto_hashblocks.h"
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
static uint64 load_bigendian(const unsigned char *x)
|
||||
{
|
||||
return
|
||||
(uint64) (x[7]) \
|
||||
| (((uint64) (x[6])) << 8) \
|
||||
| (((uint64) (x[5])) << 16) \
|
||||
| (((uint64) (x[4])) << 24) \
|
||||
| (((uint64) (x[3])) << 32) \
|
||||
| (((uint64) (x[2])) << 40) \
|
||||
| (((uint64) (x[1])) << 48) \
|
||||
| (((uint64) (x[0])) << 56)
|
||||
;
|
||||
}
|
||||
|
||||
static void store_bigendian(unsigned char *x,uint64 u)
|
||||
{
|
||||
x[7] = u; u >>= 8;
|
||||
x[6] = u; u >>= 8;
|
||||
x[5] = u; u >>= 8;
|
||||
x[4] = u; u >>= 8;
|
||||
x[3] = u; u >>= 8;
|
||||
x[2] = u; u >>= 8;
|
||||
x[1] = u; u >>= 8;
|
||||
x[0] = u;
|
||||
}
|
||||
|
||||
#define SHR(x,c) ((x) >> (c))
|
||||
#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
|
||||
#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
|
||||
#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
|
||||
|
||||
static void expand(uint64 *w)
|
||||
{
|
||||
int i;
|
||||
for (i = 0;i < 16;++i) {
|
||||
w[i] += w[15&(i+9)];
|
||||
w[i] += sigma1(w[15&(i+14)]);
|
||||
w[i] += sigma0(w[15&(i+1)]);
|
||||
}
|
||||
}
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) ((x & (y ^ z)) ^ (y & z))
|
||||
#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
|
||||
#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
|
||||
|
||||
static void handle(uint64 *r,const uint64 *w,const uint64 *c)
|
||||
{
|
||||
int i;
|
||||
uint64 x;
|
||||
|
||||
for (i = 0;i < 16;++i) {
|
||||
x = r[7&(7-i)];
|
||||
x += w[i];
|
||||
x += c[i];
|
||||
x += Sigma1(r[7&(4-i)]);
|
||||
x += Ch(r[7&(4-i)],r[7&(5-i)],r[7&(6-i)]);
|
||||
r[7&(3-i)] += x;
|
||||
x += Sigma0(r[7&(0-i)]);
|
||||
x += Maj(r[7&(0-i)],r[7&(1-i)],r[7&(2-i)]);
|
||||
r[7&(7-i)] = x;
|
||||
}
|
||||
}
|
||||
|
||||
static const uint64 round[80] = {
|
||||
0x428a2f98d728ae22ULL
|
||||
, 0x7137449123ef65cdULL
|
||||
, 0xb5c0fbcfec4d3b2fULL
|
||||
, 0xe9b5dba58189dbbcULL
|
||||
, 0x3956c25bf348b538ULL
|
||||
, 0x59f111f1b605d019ULL
|
||||
, 0x923f82a4af194f9bULL
|
||||
, 0xab1c5ed5da6d8118ULL
|
||||
, 0xd807aa98a3030242ULL
|
||||
, 0x12835b0145706fbeULL
|
||||
, 0x243185be4ee4b28cULL
|
||||
, 0x550c7dc3d5ffb4e2ULL
|
||||
, 0x72be5d74f27b896fULL
|
||||
, 0x80deb1fe3b1696b1ULL
|
||||
, 0x9bdc06a725c71235ULL
|
||||
, 0xc19bf174cf692694ULL
|
||||
, 0xe49b69c19ef14ad2ULL
|
||||
, 0xefbe4786384f25e3ULL
|
||||
, 0x0fc19dc68b8cd5b5ULL
|
||||
, 0x240ca1cc77ac9c65ULL
|
||||
, 0x2de92c6f592b0275ULL
|
||||
, 0x4a7484aa6ea6e483ULL
|
||||
, 0x5cb0a9dcbd41fbd4ULL
|
||||
, 0x76f988da831153b5ULL
|
||||
, 0x983e5152ee66dfabULL
|
||||
, 0xa831c66d2db43210ULL
|
||||
, 0xb00327c898fb213fULL
|
||||
, 0xbf597fc7beef0ee4ULL
|
||||
, 0xc6e00bf33da88fc2ULL
|
||||
, 0xd5a79147930aa725ULL
|
||||
, 0x06ca6351e003826fULL
|
||||
, 0x142929670a0e6e70ULL
|
||||
, 0x27b70a8546d22ffcULL
|
||||
, 0x2e1b21385c26c926ULL
|
||||
, 0x4d2c6dfc5ac42aedULL
|
||||
, 0x53380d139d95b3dfULL
|
||||
, 0x650a73548baf63deULL
|
||||
, 0x766a0abb3c77b2a8ULL
|
||||
, 0x81c2c92e47edaee6ULL
|
||||
, 0x92722c851482353bULL
|
||||
, 0xa2bfe8a14cf10364ULL
|
||||
, 0xa81a664bbc423001ULL
|
||||
, 0xc24b8b70d0f89791ULL
|
||||
, 0xc76c51a30654be30ULL
|
||||
, 0xd192e819d6ef5218ULL
|
||||
, 0xd69906245565a910ULL
|
||||
, 0xf40e35855771202aULL
|
||||
, 0x106aa07032bbd1b8ULL
|
||||
, 0x19a4c116b8d2d0c8ULL
|
||||
, 0x1e376c085141ab53ULL
|
||||
, 0x2748774cdf8eeb99ULL
|
||||
, 0x34b0bcb5e19b48a8ULL
|
||||
, 0x391c0cb3c5c95a63ULL
|
||||
, 0x4ed8aa4ae3418acbULL
|
||||
, 0x5b9cca4f7763e373ULL
|
||||
, 0x682e6ff3d6b2b8a3ULL
|
||||
, 0x748f82ee5defb2fcULL
|
||||
, 0x78a5636f43172f60ULL
|
||||
, 0x84c87814a1f0ab72ULL
|
||||
, 0x8cc702081a6439ecULL
|
||||
, 0x90befffa23631e28ULL
|
||||
, 0xa4506cebde82bde9ULL
|
||||
, 0xbef9a3f7b2c67915ULL
|
||||
, 0xc67178f2e372532bULL
|
||||
, 0xca273eceea26619cULL
|
||||
, 0xd186b8c721c0c207ULL
|
||||
, 0xeada7dd6cde0eb1eULL
|
||||
, 0xf57d4f7fee6ed178ULL
|
||||
, 0x06f067aa72176fbaULL
|
||||
, 0x0a637dc5a2c898a6ULL
|
||||
, 0x113f9804bef90daeULL
|
||||
, 0x1b710b35131c471bULL
|
||||
, 0x28db77f523047d84ULL
|
||||
, 0x32caab7b40c72493ULL
|
||||
, 0x3c9ebe0a15c9bebcULL
|
||||
, 0x431d67c49c100d4cULL
|
||||
, 0x4cc5d4becb3e42b6ULL
|
||||
, 0x597f299cfc657e2aULL
|
||||
, 0x5fcb6fab3ad6faecULL
|
||||
, 0x6c44198c4a475817ULL
|
||||
};
|
||||
|
||||
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,long long inlen)
|
||||
{
|
||||
uint64 w[16];
|
||||
uint64 state[8];
|
||||
uint64 r[8];
|
||||
int i;
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
state[i] = r[i] = load_bigendian(statebytes+8*i);
|
||||
|
||||
while (inlen >= 128) {
|
||||
for (i = 0;i < 16;++i)
|
||||
w[i] = load_bigendian(in+8*i);
|
||||
|
||||
handle(r,w,round+0);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+16);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+32);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+48);
|
||||
|
||||
expand(w);
|
||||
|
||||
handle(r,w,round+64);
|
||||
|
||||
for (i = 0;i < 8;++i) {
|
||||
uint64 x = r[i]+state[i];
|
||||
state[i] = x;
|
||||
r[i] = x;
|
||||
}
|
||||
|
||||
in += 128;
|
||||
inlen -= 128;
|
||||
}
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
store_bigendian(statebytes+8*i,state[i]);
|
||||
|
||||
return inlen;
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../m3/implementors
|
||||
@@ -0,0 +1 @@
|
||||
../m3/api.h
|
||||
@@ -0,0 +1,170 @@
|
||||
#include "crypto_hashblocks.h"
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
static uint64 load_bigendian(const unsigned char *x)
|
||||
{
|
||||
return
|
||||
(uint64) (x[7]) \
|
||||
| (((uint64) (x[6])) << 8) \
|
||||
| (((uint64) (x[5])) << 16) \
|
||||
| (((uint64) (x[4])) << 24) \
|
||||
| (((uint64) (x[3])) << 32) \
|
||||
| (((uint64) (x[2])) << 40) \
|
||||
| (((uint64) (x[1])) << 48) \
|
||||
| (((uint64) (x[0])) << 56)
|
||||
;
|
||||
}
|
||||
|
||||
static void store_bigendian(unsigned char *x,uint64 u)
|
||||
{
|
||||
x[7] = u; u >>= 8;
|
||||
x[6] = u; u >>= 8;
|
||||
x[5] = u; u >>= 8;
|
||||
x[4] = u; u >>= 8;
|
||||
x[3] = u; u >>= 8;
|
||||
x[2] = u; u >>= 8;
|
||||
x[1] = u; u >>= 8;
|
||||
x[0] = u;
|
||||
}
|
||||
|
||||
#define SHR(x,c) ((x) >> (c))
|
||||
#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
|
||||
#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
|
||||
#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) ((x & (y ^ z)) ^ (y & z))
|
||||
#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
|
||||
#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
|
||||
|
||||
static const uint64 round[80] = {
|
||||
0x428a2f98d728ae22ULL
|
||||
, 0x7137449123ef65cdULL
|
||||
, 0xb5c0fbcfec4d3b2fULL
|
||||
, 0xe9b5dba58189dbbcULL
|
||||
, 0x3956c25bf348b538ULL
|
||||
, 0x59f111f1b605d019ULL
|
||||
, 0x923f82a4af194f9bULL
|
||||
, 0xab1c5ed5da6d8118ULL
|
||||
, 0xd807aa98a3030242ULL
|
||||
, 0x12835b0145706fbeULL
|
||||
, 0x243185be4ee4b28cULL
|
||||
, 0x550c7dc3d5ffb4e2ULL
|
||||
, 0x72be5d74f27b896fULL
|
||||
, 0x80deb1fe3b1696b1ULL
|
||||
, 0x9bdc06a725c71235ULL
|
||||
, 0xc19bf174cf692694ULL
|
||||
, 0xe49b69c19ef14ad2ULL
|
||||
, 0xefbe4786384f25e3ULL
|
||||
, 0x0fc19dc68b8cd5b5ULL
|
||||
, 0x240ca1cc77ac9c65ULL
|
||||
, 0x2de92c6f592b0275ULL
|
||||
, 0x4a7484aa6ea6e483ULL
|
||||
, 0x5cb0a9dcbd41fbd4ULL
|
||||
, 0x76f988da831153b5ULL
|
||||
, 0x983e5152ee66dfabULL
|
||||
, 0xa831c66d2db43210ULL
|
||||
, 0xb00327c898fb213fULL
|
||||
, 0xbf597fc7beef0ee4ULL
|
||||
, 0xc6e00bf33da88fc2ULL
|
||||
, 0xd5a79147930aa725ULL
|
||||
, 0x06ca6351e003826fULL
|
||||
, 0x142929670a0e6e70ULL
|
||||
, 0x27b70a8546d22ffcULL
|
||||
, 0x2e1b21385c26c926ULL
|
||||
, 0x4d2c6dfc5ac42aedULL
|
||||
, 0x53380d139d95b3dfULL
|
||||
, 0x650a73548baf63deULL
|
||||
, 0x766a0abb3c77b2a8ULL
|
||||
, 0x81c2c92e47edaee6ULL
|
||||
, 0x92722c851482353bULL
|
||||
, 0xa2bfe8a14cf10364ULL
|
||||
, 0xa81a664bbc423001ULL
|
||||
, 0xc24b8b70d0f89791ULL
|
||||
, 0xc76c51a30654be30ULL
|
||||
, 0xd192e819d6ef5218ULL
|
||||
, 0xd69906245565a910ULL
|
||||
, 0xf40e35855771202aULL
|
||||
, 0x106aa07032bbd1b8ULL
|
||||
, 0x19a4c116b8d2d0c8ULL
|
||||
, 0x1e376c085141ab53ULL
|
||||
, 0x2748774cdf8eeb99ULL
|
||||
, 0x34b0bcb5e19b48a8ULL
|
||||
, 0x391c0cb3c5c95a63ULL
|
||||
, 0x4ed8aa4ae3418acbULL
|
||||
, 0x5b9cca4f7763e373ULL
|
||||
, 0x682e6ff3d6b2b8a3ULL
|
||||
, 0x748f82ee5defb2fcULL
|
||||
, 0x78a5636f43172f60ULL
|
||||
, 0x84c87814a1f0ab72ULL
|
||||
, 0x8cc702081a6439ecULL
|
||||
, 0x90befffa23631e28ULL
|
||||
, 0xa4506cebde82bde9ULL
|
||||
, 0xbef9a3f7b2c67915ULL
|
||||
, 0xc67178f2e372532bULL
|
||||
, 0xca273eceea26619cULL
|
||||
, 0xd186b8c721c0c207ULL
|
||||
, 0xeada7dd6cde0eb1eULL
|
||||
, 0xf57d4f7fee6ed178ULL
|
||||
, 0x06f067aa72176fbaULL
|
||||
, 0x0a637dc5a2c898a6ULL
|
||||
, 0x113f9804bef90daeULL
|
||||
, 0x1b710b35131c471bULL
|
||||
, 0x28db77f523047d84ULL
|
||||
, 0x32caab7b40c72493ULL
|
||||
, 0x3c9ebe0a15c9bebcULL
|
||||
, 0x431d67c49c100d4cULL
|
||||
, 0x4cc5d4becb3e42b6ULL
|
||||
, 0x597f299cfc657e2aULL
|
||||
, 0x5fcb6fab3ad6faecULL
|
||||
, 0x6c44198c4a475817ULL
|
||||
};
|
||||
|
||||
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,long long inlen)
|
||||
{
|
||||
uint64 w[16];
|
||||
uint64 state[8];
|
||||
uint64 r[8];
|
||||
uint64 x;
|
||||
int i;
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
state[i] = r[i] = load_bigendian(statebytes+8*i);
|
||||
|
||||
while (inlen >= 128) {
|
||||
for (i = 0;i < 16;++i)
|
||||
w[i] = load_bigendian(in+8*i);
|
||||
|
||||
for (i = 0;i < 80;++i) {
|
||||
x = r[7&(7-i)];
|
||||
x += w[15&i];
|
||||
x += round[i];
|
||||
x += Sigma1(r[7&(4-i)]);
|
||||
x += Ch(r[7&(4-i)],r[7&(5-i)],r[7&(6-i)]);
|
||||
r[7&(3-i)] += x;
|
||||
x += Sigma0(r[7&(0-i)]);
|
||||
x += Maj(r[7&(0-i)],r[7&(1-i)],r[7&(2-i)]);
|
||||
r[7&(7-i)] = x;
|
||||
|
||||
/* not used for i >= 64: */
|
||||
w[15&i] += w[15&(i+9)];
|
||||
w[15&i] += sigma1(w[15&(i+14)]);
|
||||
w[15&i] += sigma0(w[15&(i+1)]);
|
||||
}
|
||||
|
||||
for (i = 0;i < 8;++i) {
|
||||
uint64 x = r[i]+state[i];
|
||||
state[i] = x;
|
||||
r[i] = x;
|
||||
}
|
||||
|
||||
in += 128;
|
||||
inlen -= 128;
|
||||
}
|
||||
|
||||
for (i = 0;i < 8;++i)
|
||||
store_bigendian(statebytes+8*i,state[i]);
|
||||
|
||||
return inlen;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user