diff --git a/Dockerfile b/Dockerfile index 4301e05fa..f2f1e64ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,17 +5,12 @@ # # -FROM ubuntu:16.04 +FROM ubuntu@sha256:b722e2654241f9681f4719dce7aa16a2f0c35769e17a636f5b39a33967d1aeb8 + RUN apt-get update -qq && \ apt-get install -qqy automake libcurl4-openssl-dev git make gcc build-essential autotools-dev libtool sudo wget libssl-dev -RUN ls -RUN git clone https://github.com/likli/sugarmaker - -RUN cd sugarmaker && \ - ./autogen.sh && \ - ./configure CFLAGS='-O2 -fomit-frame-pointer' && \ - make WORKDIR /sugarmaker -ENTRYPOINT ["./sugarmaker"] +#ENTRYPOINT ["./sugarmaker"] +CMD ["bash", "build.sh"] \ No newline at end of file diff --git a/Dockerfile_Arm b/Dockerfile_Arm new file mode 100644 index 000000000..183f7af5c --- /dev/null +++ b/Dockerfile_Arm @@ -0,0 +1,21 @@ +# +# Dockerfile for sugarmaker +# usage: docker run creack/cpuminer --url xxxx --user xxxx --pass xxxx +# ex: docker run creack/cpuminer --url stratum+tcp://ltc.pool.com:80 --user creack.worker1 --pass abcdef +# +# + +FROM ubuntu@sha256:b722e2654241f9681f4719dce7aa16a2f0c35769e17a636f5b39a33967d1aeb8 + +RUN apt-get update -qq && \ + apt-get install -qqy automake libcurl4-openssl-dev git make gcc build-essential autotools-dev libtool sudo wget libssl-dev +RUN ls +RUN git clone https://github.com/likli/sugarmaker + +RUN cd sugarmaker && \ + ./autogen.sh && \ + ./configure CFLAGS='-O2 -fomit-frame-pointer' && \ + make + +WORKDIR /sugarmaker +ENTRYPOINT ["./sugarmaker"] diff --git a/Dockerfile_amd64 b/Dockerfile_amd64 new file mode 100644 index 000000000..183f7af5c --- /dev/null +++ b/Dockerfile_amd64 @@ -0,0 +1,21 @@ +# +# Dockerfile for sugarmaker +# usage: docker run creack/cpuminer --url xxxx --user xxxx --pass xxxx +# ex: docker run creack/cpuminer --url stratum+tcp://ltc.pool.com:80 --user creack.worker1 --pass abcdef +# +# + +FROM ubuntu@sha256:b722e2654241f9681f4719dce7aa16a2f0c35769e17a636f5b39a33967d1aeb8 + +RUN apt-get update -qq && \ + apt-get install -qqy automake libcurl4-openssl-dev git make gcc build-essential autotools-dev libtool sudo wget libssl-dev +RUN ls +RUN git clone https://github.com/likli/sugarmaker + +RUN cd sugarmaker && \ + ./autogen.sh && \ + ./configure CFLAGS='-O2 -fomit-frame-pointer' && \ + make + +WORKDIR /sugarmaker +ENTRYPOINT ["./sugarmaker"] diff --git a/Makefile.am b/Makefile.am index c5e57ad92..8e9d74b77 100644 --- a/Makefile.am +++ b/Makefile.am @@ -26,8 +26,6 @@ sugarmaker_SOURCES = elist.h miner.h compat.h \ YespowerLitb.c \ YespowerIots.c \ YespowerItc.c \ - yespower-1.0.1-power2b/sha256-p2b.c yespower-1.0.1-power2b/yespower-opt-p2b.c yespower-1.0.1-power2b/blake2b.c \ - YespowerMbc.c \ YespowerYtn.c sugarmaker_LDFLAGS = $(PTHREAD_FLAGS) diff --git a/YespowerMbc.c b/YespowerMbc.c deleted file mode 100644 index b5e24e246..000000000 --- a/YespowerMbc.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2011 ArtForz, 2011-2014 pooler, 2018 The Resistance developers, 2020 The Sugarchain Yumekawa developers - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file is loosly based on a tiny portion of pooler's cpuminer scrypt.c. - */ - -#include "cpuminer-config.h" -#include "miner.h" - -#include "yespower-1.0.1-power2b/yespower-p2b.h" - -#include -#include -#include - -int scanhash_mbc_yespower(int thr_id, uint32_t *pdata, - const uint32_t *ptarget, - uint32_t max_nonce, unsigned long *hashes_done) -{ - static const yespower_params_t params = { - .version = YESPOWER_1_0_BLAKE2B, - .N = 2048, - .r = 32, - .pers = (const uint8_t *)"Now I am become Death, the destroyer of worlds", - .perslen = 46 - }; - union { - uint8_t u8[8]; - uint32_t u32[20]; - } data; - union { - yespower_binary_t_p2b yb; - uint32_t u32[7]; - } hash; - uint32_t n = pdata[19] - 1; - const uint32_t Htarg = ptarget[7]; - int i; - - for (i = 0; i < 19; i++) - be32enc(&data.u32[i], pdata[i]); - - do { - be32enc(&data.u32[19], ++n); - - if (yespower_tls_p2b(data.u8, 80, ¶ms, &hash.yb)) - abort(); - - if (le32dec(&hash.u32[7]) <= Htarg) { - for (i = 0; i < 7; i++) - hash.u32[i] = le32dec(&hash.u32[i]); - if (fulltest(hash.u32, ptarget)) { - *hashes_done = n - pdata[19] + 1; - pdata[19] = n; - return 1; - } - } - } while (n < max_nonce && !work_restart[thr_id].restart); - - *hashes_done = n - pdata[19] + 1; - pdata[19] = n; - return 0; -} diff --git a/build.sh b/build.sh index 390454967..99505b773 100755 --- a/build.sh +++ b/build.sh @@ -4,7 +4,7 @@ rm -f config.status # BUILD ./autogen.sh -./configure CFLAGS="-Wall -O2 -fomit-frame-pointer" target_os="x86_64-pc-linux-gnu" +./configure --with-curl="/usr/local/" --with-crypto="/usr/local/" CFLAGS="-Wall -O2 -fomit-frame-pointer" CXXFLAGS="$CFLAGS -std=gnu++11" LDFLAGS="-static" LIBS="-ldl -lz" make -j$(nproc) strip -s sugarmaker diff --git a/cpu-miner.c b/cpu-miner.c index da626b05c..46c2a40f2 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -108,7 +108,6 @@ enum algos { ALGO_LITB_YESPOWER_1_0_1, ALGO_IOTS_YESPOWER_1_0_1, ALGO_ITC_YESPOWER_1_0_1, - ALGO_MBC_YESPOWER_1_0_1, ALGO_YTN_YESPOWER_1_0_1, ALGO_TDC_YESPOWER_1_0_1, }; @@ -121,9 +120,8 @@ static const char *algo_names[] = { [ALGO_LITB_YESPOWER_1_0_1] = "YespowerLitb", [ALGO_IOTS_YESPOWER_1_0_1] = "YespowerIots", [ALGO_ITC_YESPOWER_1_0_1] = "YespowerItc", - [ALGO_MBC_YESPOWER_1_0_1] = "YespowerMbc", [ALGO_YTN_YESPOWER_1_0_1] = "YespowerYtn", - [ALGO_TDC_YESPOWER_1_0_1] = "YespowerTdc", + [ALGO_TDC_YESPOWER_1_0_1] = "YespowerTIDE", }; bool opt_debug = false; @@ -191,7 +189,7 @@ Options:\n\ YespowerLitb: LightBit\n\ YespowerIots: IOTS\n\ YespowerItc: Intercoin\n\ - YespowerTdc: Tidecoin\n\ + YespowerTIDE: Tidecoin\n\ YespowerMbc: power2b for MicroBitcoin\n\ YespowerYtn: Yenten (N4096, r16, NULL)\n\ -o, --url=URL URL of mining server\n\ @@ -1222,9 +1220,6 @@ static void *miner_thread(void *userdata) case ALGO_ITC_YESPOWER_1_0_1: max64 = 499; break; - case ALGO_MBC_YESPOWER_1_0_1: - max64 = 499; - break; case ALGO_YTN_YESPOWER_1_0_1: max64 = 499; break; @@ -1286,12 +1281,6 @@ static void *miner_thread(void *userdata) ); break; - case ALGO_MBC_YESPOWER_1_0_1: - rc = scanhash_mbc_yespower( - thr_id, work.data, work.target, max_nonce, &hashes_done - ); - break; - case ALGO_YTN_YESPOWER_1_0_1: rc = scanhash_ytn_yespower( thr_id, work.data, work.target, max_nonce, &hashes_done diff --git a/miner.h b/miner.h index 2293b0611..3bdaaf31a 100644 --- a/miner.h +++ b/miner.h @@ -168,10 +168,6 @@ extern int scanhash_itc_yespower(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); -extern int scanhash_mbc_yespower(int thr_id, uint32_t *pdata, - const uint32_t *ptarget, - uint32_t max_nonce, unsigned long *hashes_done); - extern int scanhash_ytn_yespower(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); diff --git a/yespower-1.0.1-power2b/CHANGES b/yespower-1.0.1-power2b/CHANGES deleted file mode 100644 index 82420210c..000000000 --- a/yespower-1.0.1-power2b/CHANGES +++ /dev/null @@ -1,18 +0,0 @@ - Changes made between 1.0.0 (2018/07/12) and 1.0.1 (2019/06/30). - -Fill the destination buffer with all set bits on error for fail-safety -of the caller's "< target" check in case the caller neglects to check -for errors. - -Simplified SMix2 for its final invocation with Nloop=2 in yespower 0.5. - -Revised the "XOR of yespower" tests to trigger duplicate index in the -last SMix2 invocation in yespower 0.5 for N=2048 with at least one of -the values of r being tested. This is needed to test that a proper -kind of BlockMix is used in that special case, which would previously be -left untested. - -Added x32 ABI support (x86-64 with 32-bit pointers). - -Added a bit more detail to the README on caching of the computed PoW -hashes when integrating yespower in an altcoin based on Bitcoin Core. diff --git a/yespower-1.0.1-power2b/PERFORMANCE b/yespower-1.0.1-power2b/PERFORMANCE deleted file mode 100644 index 99cddd051..000000000 --- a/yespower-1.0.1-power2b/PERFORMANCE +++ /dev/null @@ -1,95 +0,0 @@ -Included with yespower is the "benchmark" program, which is built by -simply invoking "make". When invoked without parameters, it tests -yespower 0.5 at N = 2048, r = 8, which appears to be the lowest setting -in use by existing cryptocurrencies. On an i7-4770K with 4x DDR3-1600 -(on two memory channels) running CentOS 7 for x86-64 (and built with -CentOS 7's default version of gcc) and with thread affinity set, this -reports between 3700 and 3800 hashes per second for both SSE2 and AVX -builds, e.g.: - -$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark -version=0.5 N=2048 r=8 -Will use 2048.00 KiB RAM -a5 9f ec 4c 4f dd a1 6e 3b 14 05 ad da 66 d5 25 b6 8e 7c ad fc fe 6a c0 66 c7 ad 11 8c d8 05 90 -Benchmarking 1 thread ... -1018 H/s real, 1018 H/s virtual (2047 hashes in 2.01 seconds) -Benchmarking 4 threads ... -3773 H/s real, 950 H/s virtual (8188 hashes in 2.17 seconds) -min 0.984 ms, avg 1.052 ms, max 1.074 ms - -Running 8 threads (to match the logical rather than the physical CPU -core count) results in very slightly worse performance on this system, -but this might be the other way around on another and/or with other -parameters. Upgrading to yespower 1.0, performance at these parameters -improves to almost 4000 hashes per second: - -$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 10 -version=1.0 N=2048 r=8 -Will use 2048.00 KiB RAM -d0 78 cd d4 cf 3f 5a a8 4e 3c 4a 58 66 29 81 d8 2d 27 e5 67 36 37 c4 be 77 63 61 32 24 c1 8a 93 -Benchmarking 1 thread ... -1080 H/s real, 1080 H/s virtual (4095 hashes in 3.79 seconds) -Benchmarking 4 threads ... -3995 H/s real, 1011 H/s virtual (16380 hashes in 4.10 seconds) -min 0.923 ms, avg 0.989 ms, max 1.137 ms - -Running 8 threads results in substantial slowdown with this new version -(to between 3200 and 3400 hashes per second) because of cache thrashing. - -For higher settings such as those achieving 8 MiB instead of the 2 MiB -above, this system performs at around 800 hashes per second for yespower -0.5 and at around 830 hashes per second for yespower 1.0: - -$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 5 2048 32 -version=0.5 N=2048 r=32 -Will use 8192.00 KiB RAM -56 0a 89 1b 5c a2 e1 c6 36 11 1a 9f f7 c8 94 a5 d0 a2 60 2f 43 fd cf a5 94 9b 95 e2 2f e4 46 1e -Benchmarking 1 thread ... -265 H/s real, 265 H/s virtual (1023 hashes in 3.85 seconds) -Benchmarking 4 threads ... -803 H/s real, 200 H/s virtual (4092 hashes in 5.09 seconds) -min 4.924 ms, avg 4.980 ms, max 5.074 ms - -$ GOMP_CPU_AFFINITY=0-7 OMP_NUM_THREADS=4 ./benchmark 10 2048 32 -version=1.0 N=2048 r=32 -Will use 8192.00 KiB RAM -f7 69 26 ae 4a dc 56 53 90 2f f0 22 78 ea aa 39 eb 99 84 11 ac 3e a6 24 2e 19 6d fb c4 3d 68 25 -Benchmarking 1 thread ... -275 H/s real, 275 H/s virtual (1023 hashes in 3.71 seconds) -Benchmarking 4 threads ... -831 H/s real, 209 H/s virtual (4092 hashes in 4.92 seconds) -min 3.614 ms, avg 4.769 ms, max 5.011 ms - -Again, running 8 threads results in a slowdown, albeit not as bad as can -be seen for lower settings. - -On x86(-64), the following code versions may reasonably be built: SSE2, -AVX, and XOP. (There's no reason to build for AVX2 and higher, which is -unsuitable for and thus unused by current yespower anyway. There's also -no reason to build yespower as-is for SSE4, although there's a disabled -by default 32-bit specific SSE4 code version that may be re-enabled and -given a try if someone is so inclined; it may perform slightly slower or -slightly faster across different systems.) - -yescrypt and especially yespower 1.0 have been designed to fit the SSE2 -instruction set almost perfectly, so there's very little benefit from -the AVX and XOP builds, yet even at yespower 1.0 there may be -performance differences between SSE2, AVX, and XOP builds within 2% or -so (and it is unclear which is the fastest on a given system until -tested, except that where XOP is supported it is almost always faster -than AVX). - -Proper setting of thread affinities to run exactly one thread per -physical CPU core is non-trivial. In the above examples, it so happened -that the first 4 logical CPU numbers corresponded to different physical -cores, but this won't always be the case. This can vary even between -apparently similar systems. On Linux, the mapping of logical CPUs to -physical cores may be obtained from /proc/cpuinfo (on x86[-64] and MIC) -or sysfs, which an optimized implementation of e.g. a cryptocurrency -miner could use. If you do not bother obtaining this information from -the operating system, you might be better off not setting thread -affinities at all (in order to avoid the risk of doing this incorrectly, -which would have a greater negative performance impact) and/or running -as many threads as there are logical CPUs. Also, there's no certainty -whether different and future CPUs will run yespower faster using one or -maybe more threads per physical core. diff --git a/yespower-1.0.1-power2b/README b/yespower-1.0.1-power2b/README deleted file mode 100644 index c7a704db7..000000000 --- a/yespower-1.0.1-power2b/README +++ /dev/null @@ -1,203 +0,0 @@ - What is yespower? - -yespower is a proof-of-work (PoW) focused fork of yescrypt. While -yescrypt is a password-based key derivation function (KDF) and password -hashing scheme, and thus is meant for processing passwords, yespower is -meant for processing trial inputs such as block headers (including -nonces) in PoW-based blockchains. - -On its own, yespower isn't a complete proof-of-work system. Rather, in -the blockchain use case, yespower's return value is meant to be checked -for being numerically no greater than the blockchain's current target -(which is related to mining difficulty) or else the proof attempt -(yespower invocation) is to be repeated (with a different nonce) until -the condition is finally met (allowing a new block to be mined). This -process isn't specific to yespower and isn't part of yespower itself -(rather, it is similar in many PoW-based blockchains and is to be -defined and implemented externally to yespower) and thus isn't described -in here any further. - - - Why or why not yespower? - -Different proof-of-work schemes in existence vary in many aspects, -including in friendliness to different types of hardware. There's -demand for all sorts of hardware (un)friendliness in those - for -different use cases and by different communities. - -yespower in particular is designed to be CPU-friendly, GPU-unfriendly, -and FPGA/ASIC-neutral. In other words, it's meant to be relatively -efficient to compute on current CPUs and relatively inefficient on -current GPUs. Unfortunately, being GPU-unfriendly also means that -eventual FPGA and ASIC implementations will only compete with CPUs, and -at least ASICs will win over the CPUs (FPGAs might not because of this -market's peculiarities - large FPGAs are even more "over-priced" than -large CPUs are), albeit by far not to the extent they did e.g. for -Bitcoin and Litecoin. - -There's a lot of talk about "ASIC resistance". What is (or should be) -meant by that is limiting the advantage of specialized ASICs. While -limiting the advantage at KDF to e.g. 10x and at password hashing to -e.g. 100x (talking orders of magnitude here, in whatever terms) may be -considered "ASIC resistant" (as compared to e.g. 100,000x we'd have -without trying), similar improvement factors are practically not "ASIC -resistant" for cryptocurrency mining where they can make all the -difference between CPU mining being profitable and not. There might -also exist in-between PoW use cases where moderate ASIC advantage is OK, -such as with non-cryptocurrency and/or private/permissioned blockchains. - -Thus, current yespower may be considered either a short-term choice -(valid until one of its uses provides sufficient perceived incentive to -likely result in specialized ASICs) or a deliberate choice of a pro-CPU, -anti-GPU, moderately-pro-ASIC PoW scheme. It is also possible to -respond to known improvements in future GPUs/implementations and/or to -ASICs with new versions of yespower that users would need to switch to. - - - yespower versions. - -yespower includes optimized and specialized re-implementation of the -obsolete yescrypt 0.5 (based off its first submission to Password -Hashing Competition back in 2014) now re-released as yespower 0.5, and -brand new proof-of-work specific variation known as yespower 1.0. - -yespower 0.5 is intended as a compatible upgrade for cryptocurrencies -that already use yescrypt 0.5 (providing a few percent speedup), and -yespower 1.0 may be used as a further upgrade or a new choice of PoW by -those and other cryptocurrencies and other projects. - -There are many significant differences between yespower 0.5 and 1.0 -under the hood, but the main user visible difference is yespower 1.0 -greatly improving on GPU-unfriendliness in light of improvements seen in -modern GPUs (up to and including NVIDIA Volta) and GPU implementations -of yescrypt 0.5. This is achieved mostly through greater use of CPUs' -L2 cache. - -The version of algorithm to use is requested through parameters, -allowing for both algorithms to co-exist in client and miner -implementations (such as in preparation for a cryptocurrency hard-fork -and/or supporting multiple cryptocurrencies in one program). - - - Parameter selection. - -For new uses of yespower, set the requested version to the highest -supported, and set N*r to the highest you can reasonably afford in terms -of proof verification time (which might in turn be determined by desired -share rate per mining pool server), using one of the following options: - -1 MiB: N = 1024, r = 8 -2 MiB: N = 2048, r = 8 -4 MiB: N = 1024, r = 32 -8 MiB: N = 2048, r = 32 -16 MiB: N = 4096, r = 32 - -and so on for higher N keeping r=32. - -You may also set the personalization string to your liking, but that is -not required (you can set its pointer to NULL and its length to 0). Its -support is provided mostly for compatibility with existing modifications -of yescrypt 0.5. - - - Performance. - -Please refer to PERFORMANCE for some benchmarks and performance tuning. - - - How to test yespower for proper operation. - -On a Unix-like system, invoke "make check". This will build and run a -program called "tests", and check its output against the supplied file -TESTS-OK. If everything matches, the final line of output should be the -word "PASSED". - -We do most of our testing on Linux systems with gcc. The supplied -Makefile assumes that you use gcc. - - - Alternate code versions and make targets. - -Two implementations of yespower are included: reference and optimized. -By default, the optimized implementation is built. Internally, the -optimized implementation uses conditional compilation to choose between -usage of various SIMD instruction sets where supported and scalar code. - -The reference implementation is unoptimized and is very slow, but it has -simpler and shorter source code. Its purpose is to provide a simple -human- and machine-readable specification that implementations intended -for actual use should be tested against. It is deliberately mostly not -optimized, and it is not meant to be used in production. - -Similarly to "make check", there's "make check-ref" to build and test -the reference implementation. There's also "make ref" to build the -reference implementation and have the "benchmark" program use it. - -"make clean" may need to be run between making different builds. - - - How to integrate yespower in a program. - -Although yespower.h provides several functions, chances are that you -will only need to use yespower_tls(). Please see the comment on this -function in yespower.h and its example usage in tests.c and benchmark.c, -including parameter sets requesting yescrypt 0.5 as used by certain -existing cryptocurrencies. - -To integrate yespower in an altcoin based on Bitcoin Core, you might -invoke yespower_tls() from either a maybe-new (depending on where you -fork from) CBlockHeader::GetPoWHash() (and invoke that where PoW is -needed like e.g. Litecoin does for scrypt) or CBlockHeader::GetHash() -(easier, but inefficient and you'll be stuck with that inefficiency). - -You'll also want to implement caching of the computed PoW hashes like -e.g. YACoin does for scrypt. Caching is especially important if you -invoke yespower from CBlockHeader::GetHash(). However, even if you use -or introduce CBlockHeader::GetPoWHash() caching may still be desirable -as the PoW hash is commonly requested 4 times per block fetched during a -node's initial blockchain sync (once during prefetch of block headers, -and 3 times more during validation of a fully fetched block). On the -other hand, you'll likely want to bypass the cache when PoW is computed -by the node's built-in miner. - -Further detail on this (generating new genesis blocks, etc.) is even -farther from being yespower-specific and thus is not provided here. -Just like (and even more so than) yespower itself, the above guidance is -provided as-is and without guarantee of being correct and safe to -follow. You're supposed to know what you're doing. - - - Credits. - -scrypt has been designed by Colin Percival. yescrypt and yespower have -been designed by Solar Designer building upon scrypt. - -The following other people and projects have also indirectly helped make -yespower what it is: - - - Bill Cox - - Rich Felker - - Anthony Ferrara - - Christian Forler - - Taylor Hornby - - Dmitry Khovratovich - - Samuel Neves - - Marcos Simplicio - - Ken T Takusagawa - - Jakob Wenzel - - Christian Winnerlein - - - DARPA Cyber Fast Track - - Password Hashing Competition - - - Contact info. - -First, please check the yespower homepage for new versions, etc.: - - https://www.openwall.com/yespower/ - -If you have anything valuable to add or a non-trivial question to ask, -you may contact the maintainer of yespower at: - - Solar Designer diff --git a/yespower-1.0.1-power2b/TESTS-OK b/yespower-1.0.1-power2b/TESTS-OK deleted file mode 100644 index d0bd2e03c..000000000 --- a/yespower-1.0.1-power2b/TESTS-OK +++ /dev/null @@ -1,16 +0,0 @@ -yespower(5, 2048, 8, "Client Key") = a5 9f ec 4c 4f dd a1 6e 3b 14 05 ad da 66 d5 25 b6 8e 7c ad fc fe 6a c0 66 c7 ad 11 8c d8 05 90 -yespower(5, 2048, 8, BSTY) = 5e a2 b2 95 6a 9e ac e3 0a 32 37 ff 1d 44 1e de e1 dc 25 aa b8 f0 ea 15 c1 21 65 f8 3a 7b c2 65 -yespower(5, 4096, 16, "Client Key") = 92 7e 72 d0 de d3 d8 04 75 47 3f 40 f1 74 3c 67 28 9d 45 3d 52 42 d4 f5 5a f4 e3 25 e0 66 99 c5 -yespower(5, 4096, 24, "Jagaricoin") = 0e 13 66 97 32 11 e7 fe a8 ad 9d 81 98 9c 84 a2 54 d9 68 c9 d3 33 dd 8f f0 99 32 4f 38 61 1e 04 -yespower(5, 4096, 32, "WaviBanana") = 3a e0 5a bb 3c 5c f6 f7 54 15 a9 25 54 c9 8d 50 e3 8e c9 55 2c fa 78 37 36 16 f4 80 b2 4e 55 9f -yespower(5, 2048, 32, "Client Key") = 56 0a 89 1b 5c a2 e1 c6 36 11 1a 9f f7 c8 94 a5 d0 a2 60 2f 43 fd cf a5 94 9b 95 e2 2f e4 46 1e -yespower(5, 1024, 32, "Client Key") = 2a 79 e5 3d 1b e6 66 9b c5 56 cc c4 17 bc e3 d2 2a 74 a2 32 f5 6b 8e 1d 39 b4 57 92 67 5d e1 08 -yespower(5, 2048, 8, NULL) = 5e cb d8 e8 d7 c9 0b ae d4 bb f8 91 6a 12 25 dc c3 c6 5f 5c 91 65 ba e8 1c dd e3 cf fa d1 28 e8 -yespower(10, 2048, 8, NULL) = 69 e0 e8 95 b3 df 7a ee b8 37 d7 1f e1 99 e9 d3 4f 7e c4 6e cb ca 7a 2c 43 08 e5 18 57 ae 9b 46 -yespower(10, 4096, 16, NULL) = 33 fb 8f 06 38 24 a4 a0 20 f6 3d ca 53 5f 5c a6 6a b5 57 64 68 c7 5d 1c ca ac 75 42 f7 64 95 ac -yespower(10, 4096, 32, NULL) = 77 1a ee fd a8 fe 79 a0 82 5b c7 f2 ae e1 62 ab 55 78 57 46 39 ff c6 ca 37 23 cc 18 e5 e3 e2 85 -yespower(10, 2048, 32, NULL) = d5 ef b8 13 cd 26 3e 9b 34 54 01 30 23 3c bb c6 a9 21 fb ff 34 31 e5 ec 1a 1a bd e2 ae a6 ff 4d -yespower(10, 1024, 32, NULL) = 50 1b 79 2d b4 2e 38 8f 6e 7d 45 3c 95 d0 3a 12 a3 60 16 a5 15 4a 68 83 90 dd c6 09 a4 0c 67 99 -yespower(10, 1024, 32, "personality test") = 1f 02 69 ac f5 65 c4 9a dc 0e f9 b8 f2 6a b3 80 8c dc 38 39 4a 25 4f dd ee dc c3 aa cf f6 ad 9d -XOR of yespower(5, ...) = ae f1 32 91 87 0f 55 70 47 f4 2e 9b ef a6 16 df e5 f1 96 77 e1 3f 8b a6 92 f7 c5 97 55 a0 f5 0e -XOR of yespower(10, ...) = 8d 13 c5 fb 07 30 96 75 d1 b8 48 92 77 ba 4b e4 40 33 be df ae 7a 60 43 8a 9b e2 1f 3a 7b 12 37 diff --git a/yespower-1.0.1-power2b/benchmark.c b/yespower-1.0.1-power2b/benchmark.c deleted file mode 100644 index 06b579ba7..000000000 --- a/yespower-1.0.1-power2b/benchmark.c +++ /dev/null @@ -1,262 +0,0 @@ -/*- - * Copyright 2013-2018 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include /* for atoi() */ -#include -#include -#include -#include -#include - -#include "yespower.h" - -#ifdef _OPENMP -#include - -#define NSAVE 1000 - -static uint64_t time_us(void) -{ - struct timespec t; -#ifdef CLOCK_MONOTONIC_RAW - if (clock_gettime(CLOCK_MONOTONIC_RAW, &t)) - return 0; -#else - if (clock_gettime(CLOCK_MONOTONIC, &t)) - return 0; -#endif - return 1 + (uint64_t)t.tv_sec * 1000000 + t.tv_nsec / 1000; -} -#endif - -int main(int argc, const char * const *argv) -{ - yespower_params_t params = { - .version = YESPOWER_1_0_BLAKE2B, - .N = 2048, - .r = 32, - .pers = (const uint8_t *)"Now I am become Death, the destroyer of worlds", - .perslen = 46 - }; - - if (argc > 1) - params.version = atoi(argv[1]); - if (argc > 2) - params.N = atoi(argv[2]); - if (argc > 3) - params.r = atoi(argv[3]); - - printf("version=%.1f N=%u r=%u\n", - params.version * 0.1, params.N, params.r); - - printf("Will use %.2f KiB RAM\n", 0.125 * params.N * params.r); - - static __thread union { - uint8_t u8[80]; - uint32_t u32[20]; - } src; - yespower_binary_t_p2b dst; - unsigned int i; - - for (i = 0; i < sizeof(src); i++) - src.u8[i] = i * 3; - - if (yespower_tls_p2b(src.u8, sizeof(src), ¶ms, &dst)) { - puts("FAILED"); - return 1; - } - - for (i = 0; i < sizeof(dst); i++) - printf("%02x%c", dst.uc[i], i < sizeof(dst) - 1 ? ' ' : '\n'); - - puts("Benchmarking 1 thread ..."); - - clock_t clk_tck = sysconf(_SC_CLK_TCK); - struct tms start_tms, end_tms; - clock_t start = times(&start_tms), end; - unsigned int n; - unsigned long long count; -#ifdef _OPENMP - yespower_binary_t_p2b save[NSAVE]; - unsigned int nsave = 0; -#endif - uint32_t seed = start * 1812433253U; - - n = 1; - count = 0; - do { - for (i = 0; i < n; i++) { - yespower_binary_t_p2b *p = &dst; -#ifdef _OPENMP - if (nsave < NSAVE) - p = &save[nsave++]; -#endif - src.u32[19] = seed + (count + i); - if (yespower_tls_p2b(src.u8, sizeof(src), ¶ms, p)) { - puts("FAILED"); - return 1; - } - } - count += n; - - end = times(&end_tms); - n <<= 1; - } while (end - start < clk_tck * 2); - - clock_t start_v = start_tms.tms_utime + start_tms.tms_stime + - start_tms.tms_cutime + start_tms.tms_cstime; - clock_t end_v = end_tms.tms_utime + end_tms.tms_stime + - end_tms.tms_cutime + end_tms.tms_cstime; - - printf("%llu H/s real, %llu H/s virtual " - "(%llu hashes in %.2f seconds)\n", - count * clk_tck / (end - start), - count * clk_tck / (end_v - start_v), - count, (double)(end - start) / clk_tck); - - for (i = 0; i < nsave; i++) { - unsigned int j; - for (j = i + 1; j < nsave; j++) { - unsigned int k = 8; - if (!memcmp(&save[i], &save[j], k)) { - printf("%u-byte collision(s) detected\n", k); - i = nsave; break; - } - } - } - -#ifdef _OPENMP - unsigned int nt = omp_get_max_threads(); - - printf("Benchmarking %u thread%s ...\n", - nt, nt == 1 ? "" : "s"); - - typedef struct { - uint64_t min, max, total; - } thread_data_s; - union { - thread_data_s s; - uint8_t cachelines[2][64]; /* avoid false sharing */ - } thread_data[nt]; /* tricky to align this when on stack */ - - unsigned int t; - for (t = 0; t < nt; t++) { - thread_data_s *td = &thread_data[t].s; - td->min = ~(uint64_t)0; td->max = 0; td->total = 0; - } - - unsigned long long count1 = count, count_restart = 0; - - if (!geteuid()) { - puts("Running as root, so trying to set SCHED_RR"); -#pragma omp parallel - { - struct sched_param param = { .sched_priority = 1 }; - if (sched_setscheduler(getpid(), SCHED_RR, ¶m)) - perror("sched_setscheduler"); - } - } - - start = times(&start_tms); - - n = count * omp_get_max_threads(); - count = 0; - do { -#pragma omp parallel for default(none) copyin(src) private(i, dst) shared(n, thread_data, params, seed, count, save, nsave) - for (i = 0; i < n; i++) { - unsigned int j = count + i; - - src.u32[19] = seed + j; - - uint64_t start1 = time_us(); - - if (yespower_tls_p2b(src.u8, sizeof(src), ¶ms, &dst)) { -#pragma omp critical - puts("FAILED"); - } - - uint64_t end1 = time_us(); - if (end1 < start1) - end1 = start1; - uint64_t diff1 = end1 - start1; - - thread_data_s *td = &thread_data[omp_get_thread_num()].s; - td->total += diff1; - if (diff1 < td->min) - td->min = diff1; - if (diff1 > td->max) - td->max = diff1; - -#ifdef _OPENMP - if (j < nsave && memcmp(&save[j], &dst, sizeof(dst))) { -#pragma omp critical - printf("Mismatch at %u\n", j); - } -#endif - } - - count += n; - if ((count - n) < count1 && count >= count1) { -/* Disregard our repeat of single thread's results (could be partially cached - * by same core, but OTOH other cores not yet warmed up to full clock rate). */ - start = times(&start_tms); - count_restart = count; - for (t = 0; t < nt; t++) { - thread_data_s *td = &thread_data[t].s; - td->min = ~(uint64_t)0; td->max = 0; td->total = 0; - } - } else { - n <<= 1; - } - - end = times(&end_tms); - } while (end - start < clk_tck); - - if (!count_restart) - puts("Didn't reach single-thread's hash count"); - count -= count_restart; - - start_v = start_tms.tms_utime + start_tms.tms_stime + - start_tms.tms_cutime + start_tms.tms_cstime; - end_v = end_tms.tms_utime + end_tms.tms_stime + - end_tms.tms_cutime + end_tms.tms_cstime; - - printf("%llu H/s real, %llu H/s virtual " - "(%llu hashes in %.2f seconds)\n", - count * clk_tck / (end - start), - count * clk_tck / (end_v - start_v), - count, (double)(end - start) / clk_tck); - - uint64_t min = ~(uint64_t)0, max = 0, total = 0; - for (t = 0; t < nt; t++) { - thread_data_s *td = &thread_data[t].s; - total += td->total; - if (td->min < min) - min = td->min; - if (td->max > max) - max = td->max; - } - printf("min %.3f ms, avg %.3f ms, max %.3f ms\n", - min / 1000.0, total / 1000.0 / count, max / 1000.0); -#endif - - return 0; -} diff --git a/yespower-1.0.1-power2b/blake2b.c b/yespower-1.0.1-power2b/blake2b.c deleted file mode 100755 index a5ae6298a..000000000 --- a/yespower-1.0.1-power2b/blake2b.c +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright 2009 Colin Percival, 2014 savale - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ - -#include -#include -#include - -#include "sph_types.h" -#include "sysendian-p2b.h" -#include "blake2b.h" - -// Cyclic right rotation. -#ifndef ROTR64 -#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y)))) -#endif - -// Little-endian byte access. -#define B2B_GET64(p) \ - (((uint64_t) ((uint8_t *) (p))[0]) ^ \ - (((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \ - (((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \ - (((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \ - (((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \ - (((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \ - (((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \ - (((uint64_t) ((uint8_t *) (p))[7]) << 56)) - -// G Mixing function. -#define B2B_G(a, b, c, d, x, y) { \ - v[a] = v[a] + v[b] + x; \ - v[d] = ROTR64(v[d] ^ v[a], 32); \ - v[c] = v[c] + v[d]; \ - v[b] = ROTR64(v[b] ^ v[c], 24); \ - v[a] = v[a] + v[b] + y; \ - v[d] = ROTR64(v[d] ^ v[a], 16); \ - v[c] = v[c] + v[d]; \ - v[b] = ROTR64(v[b] ^ v[c], 63); } - -// Initialization Vector. -static const uint64_t blake2b_iv[8] = { - 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, - 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, - 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, - 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 -}; - -// Compression function. "last" flag indicates last block. -static void blake2b_compress(blake2b_ctx *ctx, int last) -{ - const uint8_t sigma[12][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } - }; - int i; - uint64_t v[16], m[16]; - - // init work variables - for (i = 0; i < 8; i++) { - v[i] = ctx->h[i]; - v[i + 8] = blake2b_iv[i]; - } - - v[12] ^= ctx->t[0]; // low 64 bits of offset - v[13] ^= ctx->t[1]; // high 64 bits - - // last block flag set ? - if (last) { - v[14] = ~v[14]; - } - - // get little-endian words - for (i = 0; i < 16; i++) { - m[i] = B2B_GET64(&ctx->b[8 * i]); - } - - // twelve rounds - for (i = 0; i < 12; i++) { - B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]); - B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]); - B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]); - B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]); - B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]); - B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]); - B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]); - B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]); - } - - for(i = 0; i < 8; ++i) { - ctx->h[i] ^= v[i] ^ v[i + 8]; - } -} - -// Initialize the hashing context "ctx" with optional key "key". -// 1 <= outlen <= 64 gives the digest size in bytes. -// Secret key (also <= 64 bytes) is optional (keylen = 0). -int blake2b_init(blake2b_ctx *ctx, size_t outlen, - const void *key, size_t keylen) // (keylen=0: no key) -{ - size_t i; - - // illegal parameters - if (outlen == 0 || outlen > 64 || keylen > 64) { - return -1; - } - - // state, "param block" - for (i = 0; i < 8; i++) { - ctx->h[i] = blake2b_iv[i]; - } - - ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen; - - ctx->t[0] = 0; // input count low word - ctx->t[1] = 0; // input count high word - ctx->c = 0; // pointer within buffer - ctx->outlen = outlen; - - // zero input block - for (i = keylen; i < 128; i++) { - ctx->b[i] = 0; - } - - if (keylen > 0) { - blake2b_update(ctx, key, keylen); - ctx->c = 128; // at the end - } - - return 0; -} - -// Add "inlen" bytes from "in" into the hash. -void blake2b_update(blake2b_ctx *ctx, - const void *in, size_t inlen) // data bytes -{ - size_t i; - for (i = 0; i < inlen; i++) { - if (ctx->c == 128) { // buffer full ? - ctx->t[0] += ctx->c; // add counters - if (ctx->t[0] < ctx->c) // carry overflow ? - ctx->t[1]++; // high word - blake2b_compress(ctx, 0); // compress (not last) - ctx->c = 0; // counter to zero - } - ctx->b[ctx->c++] = ((const uint8_t *) in)[i]; - } -} - -// Generate the message digest (size given in init). -// Result placed in "out". -void blake2b_final(blake2b_ctx *ctx, void *out) -{ - size_t i; - - ctx->t[0] += ctx->c; // mark last block offset - // carry overflow - if (ctx->t[0] < ctx->c) { - ctx->t[1]++; // high word - } - - // fill up with zeros - while (ctx->c < 128) { - ctx->b[ctx->c++] = 0; - } - - blake2b_compress(ctx, 1); // final block flag = 1 - - // little endian convert and store - for (i = 0; i < ctx->outlen; i++) { - ((uint8_t *) out)[i] = - (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF; - } -} - -// inlen = number of bytes -void blake2b_hash(void *out, const void *in, size_t inlen) { - blake2b_ctx ctx; - blake2b_init(&ctx, 32, NULL, 0); - blake2b_update(&ctx, in, inlen); - blake2b_final(&ctx, out); -} - -// // keylen = number of bytes -void hmac_blake2b_init(hmac_ctx *hctx, const void *_key, size_t keylen) { - const uint8_t *key = _key; - uint8_t keyhash[32]; - uint8_t pad[64]; - uint64_t i; - - if (keylen > 64) { - blake2b_hash(keyhash, key, keylen); - key = keyhash; - keylen = 32; - } - - blake2b_init(&hctx->inner, 32, NULL, 0); - memset(pad, 0x36, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - - blake2b_update(&hctx->inner, pad, 64); - blake2b_init(&hctx->outer, 32, NULL, 0); - memset(pad, 0x5c, 64); - for (i = 0; i < keylen; ++i) { - pad[i] ^= key[i]; - } - - blake2b_update(&hctx->outer, pad, 64); - memset(keyhash, 0, 32); -} - -// datalen = number of bits -void hmac_blake2b_update(hmac_ctx *hctx, const void *data, size_t datalen) { - // update the inner state - blake2b_update(&hctx->inner, data, datalen); -} - -void hmac_blake2b_final(hmac_ctx *hctx, uint8_t *digest) { - uint8_t ihash[32]; - blake2b_final(&hctx->inner, ihash); - blake2b_update(&hctx->outer, ihash, 32); - blake2b_final(&hctx->outer, digest); - memset(ihash, 0, 32); -} - -// // keylen = number of bytes; inlen = number of bytes -void hmac_blake2b_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen) { - hmac_ctx hctx; - hmac_blake2b_init(&hctx, key, keylen); - hmac_blake2b_update(&hctx, in, inlen); - hmac_blake2b_final(&hctx, out); -} - -void pbkdf2_blake2b(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) -{ - hmac_ctx PShctx, hctx; - size_t i; - uint8_t ivec[4]; - uint8_t U[32]; - uint8_t T[32]; - uint64_t j; - int k; - size_t clen; - - /* Compute HMAC state after processing P and S. */ - hmac_blake2b_init(&PShctx, passwd, passwdlen); - hmac_blake2b_update(&PShctx, salt, saltlen); - - /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { - /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); - - /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(&hctx, &PShctx, sizeof(hmac_ctx)); - hmac_blake2b_update(&hctx, ivec, 4); - hmac_blake2b_final(&hctx, U); - - /* T_i = U_1 ... */ - memcpy(T, U, 32); - - for (j = 2; j <= c; j++) { - /* Compute U_j. */ - hmac_blake2b_init(&hctx, passwd, passwdlen); - hmac_blake2b_update(&hctx, U, 32); - hmac_blake2b_final(&hctx, U); - - /* ... xor U_j ... */ - for (k = 0; k < 32; k++) { - T[k] ^= U[k]; - } - } - - /* Copy as many bytes as necessary into buf. */ - clen = dkLen - i * 32; - if (clen > 32) { - clen = 32; - } - - memcpy(&buf[i * 32], T, clen); - } - - /* Clean PShctx, since we never called _Final on it. */ - memset(&PShctx, 0, sizeof(hmac_ctx)); -} diff --git a/yespower-1.0.1-power2b/blake2b.h b/yespower-1.0.1-power2b/blake2b.h deleted file mode 100755 index f890a6384..000000000 --- a/yespower-1.0.1-power2b/blake2b.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once -#ifndef __BLAKE2B_H__ -#define __BLAKE2B_H__ - -#include -#include - -#if defined(_MSC_VER) || defined(__x86_64__) || defined(__x86__) -#define NATIVE_LITTLE_ENDIAN -#endif - -// state context -typedef struct { - uint8_t b[128]; // input buffer - uint64_t h[8]; // chained state - uint64_t t[2]; // total number of bytes - size_t c; // pointer for b[] - size_t outlen; // digest size -} blake2b_ctx; - -typedef struct { - blake2b_ctx inner; - blake2b_ctx outer; -} hmac_ctx; - -#if defined(__cplusplus) -extern "C" { -#endif - -int blake2b_init(blake2b_ctx *ctx, size_t outlen, const void *key, size_t keylen); -void blake2b_update(blake2b_ctx *ctx, const void *in, size_t inlen); -void blake2b_final(blake2b_ctx *ctx, void *out); -void blake2b_hash(void *out, const void *in, size_t inlen); -void hmac_blake2b_hash(void *out, const void *key, size_t keylen, const void *in, size_t inlen); -void pbkdf2_blake2b(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen); - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/yespower-1.0.1-power2b/insecure_memzero-p2b.h b/yespower-1.0.1-power2b/insecure_memzero-p2b.h deleted file mode 100644 index 5a0ba75c4..000000000 --- a/yespower-1.0.1-power2b/insecure_memzero-p2b.h +++ /dev/null @@ -1 +0,0 @@ -#define insecure_memzero(buf, len) /* empty */ diff --git a/yespower-1.0.1-power2b/sha256-p2b.c b/yespower-1.0.1-power2b/sha256-p2b.c deleted file mode 100644 index 0a032c6c4..000000000 --- a/yespower-1.0.1-power2b/sha256-p2b.c +++ /dev/null @@ -1,646 +0,0 @@ -/*- - * Copyright 2005-2016 Colin Percival - * Copyright 2016-2018 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - -#include "insecure_memzero-p2b.h" -#include "sysendian-p2b.h" - -#include "sha256-p2b.h" - -#ifdef __ICC -/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */ -#define restrict -#elif __STDC_VERSION__ >= 199901L -/* Have restrict */ -#elif defined(__GNUC__) -#define restrict __restrict -#else -#define restrict -#endif - -/* - * Encode a length len*2 vector of (uint32_t) into a length len*8 vector of - * (uint8_t) in big-endian form. - */ -static void -be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len) -{ - - /* Encode vector, two words at a time. */ - do { - be32enc(&dst[0], src[0]); - be32enc(&dst[4], src[1]); - src += 2; - dst += 8; - } while (--len); -} - -/* - * Decode a big-endian length len*8 vector of (uint8_t) into a length - * len*2 vector of (uint32_t). - */ -static void -be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len) -{ - - /* Decode vector, two words at a time. */ - do { - dst[0] = be32dec(&src[0]); - dst[1] = be32dec(&src[4]); - src += 8; - dst += 2; - } while (--len); -} - -/* SHA256 round constants. */ -static const uint32_t Krnd[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -/* Elementary functions used by SHA256 */ -#define Ch(x, y, z) ((x & (y ^ z)) ^ z) -#define Maj(x, y, z) ((x & (y | z)) | (y & z)) -#define SHR(x, n) (x >> n) -#define ROTR(x, n) ((x >> n) | (x << (32 - n))) -#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) -#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) -#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) -#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) - -/* SHA256 round function */ -#define RND(a, b, c, d, e, f, g, h, k) \ - h += S1(e) + Ch(e, f, g) + k; \ - d += h; \ - h += S0(a) + Maj(a, b, c); - -/* Adjusted round function for rotating state */ -#define RNDr(S, W, i, ii) \ - RND(S[(64 - i) % 8], S[(65 - i) % 8], \ - S[(66 - i) % 8], S[(67 - i) % 8], \ - S[(68 - i) % 8], S[(69 - i) % 8], \ - S[(70 - i) % 8], S[(71 - i) % 8], \ - W[i + ii] + Krnd[i + ii]) - -/* Message schedule computation */ -#define MSCH(W, ii, i) \ - W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii] - -/* - * SHA256 block compression function. The 256-bit state is transformed via - * the 512-bit input block to produce a new state. - */ -static void -SHA256_Transform(uint32_t state[static restrict 8], - const uint8_t block[static restrict 64], - uint32_t W[static restrict 64], uint32_t S[static restrict 8]) -{ - int i; - - /* 1. Prepare the first part of the message schedule W. */ - be32dec_vect(W, block, 8); - - /* 2. Initialize working variables. */ - memcpy(S, state, 32); - - /* 3. Mix. */ - for (i = 0; i < 64; i += 16) { - RNDr(S, W, 0, i); - RNDr(S, W, 1, i); - RNDr(S, W, 2, i); - RNDr(S, W, 3, i); - RNDr(S, W, 4, i); - RNDr(S, W, 5, i); - RNDr(S, W, 6, i); - RNDr(S, W, 7, i); - RNDr(S, W, 8, i); - RNDr(S, W, 9, i); - RNDr(S, W, 10, i); - RNDr(S, W, 11, i); - RNDr(S, W, 12, i); - RNDr(S, W, 13, i); - RNDr(S, W, 14, i); - RNDr(S, W, 15, i); - - if (i == 48) - break; - MSCH(W, 0, i); - MSCH(W, 1, i); - MSCH(W, 2, i); - MSCH(W, 3, i); - MSCH(W, 4, i); - MSCH(W, 5, i); - MSCH(W, 6, i); - MSCH(W, 7, i); - MSCH(W, 8, i); - MSCH(W, 9, i); - MSCH(W, 10, i); - MSCH(W, 11, i); - MSCH(W, 12, i); - MSCH(W, 13, i); - MSCH(W, 14, i); - MSCH(W, 15, i); - } - - /* 4. Mix local working variables into global state. */ - state[0] += S[0]; - state[1] += S[1]; - state[2] += S[2]; - state[3] += S[3]; - state[4] += S[4]; - state[5] += S[5]; - state[6] += S[6]; - state[7] += S[7]; -} - -static const uint8_t PAD[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Add padding and terminating bit-count. */ -static void -SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72]) -{ - size_t r; - - /* Figure out how many bytes we have buffered. */ - r = (ctx->count >> 3) & 0x3f; - - /* Pad to 56 mod 64, transforming if we finish a block en route. */ - if (r < 56) { - /* Pad to 56 mod 64. */ - memcpy(&ctx->buf[r], PAD, 56 - r); - } else { - /* Finish the current block and mix. */ - memcpy(&ctx->buf[r], PAD, 64 - r); - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); - - /* The start of the final block is all zeroes. */ - memset(&ctx->buf[0], 0, 56); - } - - /* Add the terminating bit-count. */ - be64enc(&ctx->buf[56], ctx->count); - - /* Mix in the final block. */ - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); -} - -/* Magic initialization constants. */ -static const uint32_t initial_state[8] = { - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, - 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 -}; - -/** - * SHA256_Init(ctx): - * Initialize the SHA256 context ${ctx}. - */ -void -SHA256_Init(SHA256_CTX * ctx) -{ - - /* Zero bits processed so far. */ - ctx->count = 0; - - /* Initialize state. */ - memcpy(ctx->state, initial_state, sizeof(initial_state)); -} - -/** - * SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. - */ -static void -_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len, - uint32_t tmp32[static restrict 72]) -{ - uint32_t r; - const uint8_t * src = in; - - /* Return immediately if we have nothing to do. */ - if (len == 0) - return; - - /* Number of bytes left in the buffer from previous updates. */ - r = (ctx->count >> 3) & 0x3f; - - /* Update number of bits. */ - ctx->count += (uint64_t)(len) << 3; - - /* Handle the case where we don't need to perform any transforms. */ - if (len < 64 - r) { - memcpy(&ctx->buf[r], src, len); - return; - } - - /* Finish the current block. */ - memcpy(&ctx->buf[r], src, 64 - r); - SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); - src += 64 - r; - len -= 64 - r; - - /* Perform complete blocks. */ - while (len >= 64) { - SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]); - src += 64; - len -= 64; - } - - /* Copy left over data into buffer. */ - memcpy(ctx->buf, src, len); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len) -{ - uint32_t tmp32[72]; - - /* Call the real function. */ - _SHA256_Update(ctx, in, len, tmp32); - - /* Clean the stack. */ - insecure_memzero(tmp32, 288); -} - -/** - * SHA256_Final(digest, ctx): - * Output the SHA256 hash of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -static void -_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx, - uint32_t tmp32[static restrict 72]) -{ - - /* Add padding. */ - SHA256_Pad(ctx, tmp32); - - /* Write the hash. */ - be32enc_vect(digest, ctx->state, 4); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) -{ - uint32_t tmp32[72]; - - /* Call the real function. */ - _SHA256_Final(digest, ctx, tmp32); - - /* Clear the context state. */ - insecure_memzero(ctx, sizeof(SHA256_CTX)); - - /* Clean the stack. */ - insecure_memzero(tmp32, 288); -} - -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -void -SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) -{ - SHA256_CTX ctx; - uint32_t tmp32[72]; - - SHA256_Init(&ctx); - _SHA256_Update(&ctx, in, len, tmp32); - _SHA256_Final(digest, &ctx, tmp32); - - /* Clean the stack. */ - insecure_memzero(&ctx, sizeof(SHA256_CTX)); - insecure_memzero(tmp32, 288); -} - -/** - * HMAC_SHA256_Init(ctx, K, Klen): - * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from - * ${K}. - */ -static void -_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen, - uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64], - uint8_t khash[static restrict 32]) -{ - const uint8_t * K = _K; - size_t i; - - /* If Klen > 64, the key is really SHA256(K). */ - if (Klen > 64) { - SHA256_Init(&ctx->ictx); - _SHA256_Update(&ctx->ictx, K, Klen, tmp32); - _SHA256_Final(khash, &ctx->ictx, tmp32); - K = khash; - Klen = 32; - } - - /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ - SHA256_Init(&ctx->ictx); - memset(pad, 0x36, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - _SHA256_Update(&ctx->ictx, pad, 64, tmp32); - - /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ - SHA256_Init(&ctx->octx); - memset(pad, 0x5c, 64); - for (i = 0; i < Klen; i++) - pad[i] ^= K[i]; - _SHA256_Update(&ctx->octx, pad, 64, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) -{ - uint32_t tmp32[72]; - uint8_t pad[64]; - uint8_t khash[32]; - - /* Call the real function. */ - _HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash); - - /* Clean the stack. */ - insecure_memzero(tmp32, 288); - insecure_memzero(khash, 32); - insecure_memzero(pad, 64); -} - -/** - * HMAC_SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. - */ -static void -_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len, - uint32_t tmp32[static restrict 72]) -{ - - /* Feed data to the inner SHA256 operation. */ - _SHA256_Update(&ctx->ictx, in, len, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) -{ - uint32_t tmp32[72]; - - /* Call the real function. */ - _HMAC_SHA256_Update(ctx, in, len, tmp32); - - /* Clean the stack. */ - insecure_memzero(tmp32, 288); -} - -/** - * HMAC_SHA256_Final(digest, ctx): - * Output the HMAC-SHA256 of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -static void -_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx, - uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32]) -{ - - /* Finish the inner SHA256 operation. */ - _SHA256_Final(ihash, &ctx->ictx, tmp32); - - /* Feed the inner hash to the outer SHA256 operation. */ - _SHA256_Update(&ctx->octx, ihash, 32, tmp32); - - /* Finish the outer SHA256 operation. */ - _SHA256_Final(digest, &ctx->octx, tmp32); -} - -/* Wrapper function for intermediate-values sanitization. */ -void -HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) -{ - uint32_t tmp32[72]; - uint8_t ihash[32]; - - /* Call the real function. */ - _HMAC_SHA256_Final(digest, ctx, tmp32, ihash); - - /* Clean the stack. */ - insecure_memzero(tmp32, 288); - insecure_memzero(ihash, 32); -} - -/** - * HMAC_SHA256_Buf_P2b(K, Klen, in, len, digest): - * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of - * length ${Klen}, and write the result to ${digest}. - */ -void -HMAC_SHA256_Buf_P2b(const void * K, size_t Klen, const void * in, size_t len, - uint8_t digest[32]) -{ - HMAC_SHA256_CTX ctx; - uint32_t tmp32[72]; - uint8_t tmp8[96]; - - _HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]); - _HMAC_SHA256_Update(&ctx, in, len, tmp32); - _HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]); - - /* Clean the stack. */ - insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX)); - insecure_memzero(tmp32, 288); - insecure_memzero(tmp8, 96); -} - -/* Add padding and terminating bit-count, but don't invoke Transform yet. */ -static int -SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8], - uint32_t tmp32[static restrict 72]) -{ - uint32_t r; - - r = (ctx->count >> 3) & 0x3f; - if (r >= 56) - return -1; - - /* - * Convert length to a vector of bytes -- we do this now rather - * than later because the length will change after we pad. - */ - be64enc(len, ctx->count); - - /* Add 1--56 bytes so that the resulting length is 56 mod 64. */ - _SHA256_Update(ctx, PAD, 56 - r, tmp32); - - /* Add the terminating bit-count. */ - ctx->buf[63] = len[7]; - _SHA256_Update(ctx, len, 7, tmp32); - - return 0; -} - -/** - * PBKDF2_SHA256_P2B(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -void -PBKDF2_SHA256_P2B(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, - size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) -{ - HMAC_SHA256_CTX Phctx, PShctx, hctx; - uint32_t tmp32[72]; - union { - uint8_t tmp8[96]; - uint32_t state[8]; - } u; - size_t i; - uint8_t ivec[4]; - uint8_t U[32]; - uint8_t T[32]; - uint64_t j; - int k; - size_t clen; - - /* Sanity-check. */ - assert(dkLen <= 32 * (size_t)(UINT32_MAX)); - - if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) { - uint32_t oldcount; - uint8_t * ivecp; - - /* Compute HMAC state after processing P and S. */ - _HMAC_SHA256_Init(&hctx, passwd, passwdlen, - tmp32, &u.tmp8[0], &u.tmp8[64]); - _HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32); - - /* Prepare ictx padding. */ - oldcount = hctx.ictx.count & (0x3f << 3); - _HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32); - if ((hctx.ictx.count & (0x3f << 3)) < oldcount || - SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32)) - goto generic; /* Can't happen due to saltlen check */ - ivecp = hctx.ictx.buf + (oldcount >> 3); - - /* Prepare octx padding. */ - hctx.octx.count += 32 << 3; - SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32); - - /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { - /* Generate INT(i + 1). */ - be32enc(ivecp, (uint32_t)(i + 1)); - - /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(u.state, hctx.ictx.state, sizeof(u.state)); - SHA256_Transform(u.state, hctx.ictx.buf, - &tmp32[0], &tmp32[64]); - be32enc_vect(hctx.octx.buf, u.state, 4); - memcpy(u.state, hctx.octx.state, sizeof(u.state)); - SHA256_Transform(u.state, hctx.octx.buf, - &tmp32[0], &tmp32[64]); - be32enc_vect(&buf[i * 32], u.state, 4); - } - - goto cleanup; - } - -generic: - /* Compute HMAC state after processing P. */ - _HMAC_SHA256_Init(&Phctx, passwd, passwdlen, - tmp32, &u.tmp8[0], &u.tmp8[64]); - - /* Compute HMAC state after processing P and S. */ - memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32); - - /* Iterate through the blocks. */ - for (i = 0; i * 32 < dkLen; i++) { - /* Generate INT(i + 1). */ - be32enc(ivec, (uint32_t)(i + 1)); - - /* Compute U_1 = PRF(P, S || INT(i)). */ - memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&hctx, ivec, 4, tmp32); - _HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8); - - if (c > 1) { - /* T_i = U_1 ... */ - memcpy(U, T, 32); - - for (j = 2; j <= c; j++) { - /* Compute U_j. */ - memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); - _HMAC_SHA256_Update(&hctx, U, 32, tmp32); - _HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8); - - /* ... xor U_j ... */ - for (k = 0; k < 32; k++) - T[k] ^= U[k]; - } - } - - /* Copy as many bytes as necessary into buf. */ - clen = dkLen - i * 32; - if (clen > 32) - clen = 32; - memcpy(&buf[i * 32], T, clen); - } - - /* Clean the stack. */ - insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX)); - insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX)); - insecure_memzero(U, 32); - insecure_memzero(T, 32); - -cleanup: - insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX)); - insecure_memzero(tmp32, 288); - insecure_memzero(&u, sizeof(u)); -} diff --git a/yespower-1.0.1-power2b/sha256-p2b.h b/yespower-1.0.1-power2b/sha256-p2b.h deleted file mode 100644 index 655e36a77..000000000 --- a/yespower-1.0.1-power2b/sha256-p2b.h +++ /dev/null @@ -1,129 +0,0 @@ -/*- - * Copyright 2005-2016 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _SHA256_H_ -#define _SHA256_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Use #defines in order to avoid namespace collisions with anyone else's - * SHA256 code (e.g., the code in OpenSSL). - */ -#define SHA256_Init libcperciva_SHA256_Init_P2B -#define SHA256_Update libcperciva_SHA256_Update_P2B -#define SHA256_Final libcperciva_SHA256_Final_P2B -#define SHA256_Buf libcperciva_SHA256_Buf_P2B -#define SHA256_CTX libcperciva_SHA256_CTX_P2B -#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init_P2B -#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update_P2B -#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final_P2B -#define HMAC_SHA256_Buf_P2b libcperciva_HMAC_SHA256_Buf_P2B -#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX_P2B - -/* Context structure for SHA256 operations. */ -typedef struct { - uint32_t state[8]; - uint64_t count; - uint8_t buf[64]; -} SHA256_CTX; - -/** - * SHA256_Init(ctx): - * Initialize the SHA256 context ${ctx}. - */ -void SHA256_Init(SHA256_CTX *); - -/** - * SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. - */ -void SHA256_Update(SHA256_CTX *, const void *, size_t); - -/** - * SHA256_Final(digest, ctx): - * Output the SHA256 hash of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -void SHA256_Final(uint8_t[32], SHA256_CTX *); - -/** - * SHA256_Buf(in, len, digest): - * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. - */ -void SHA256_Buf(const void *, size_t, uint8_t[32]); - -/* Context structure for HMAC-SHA256 operations. */ -typedef struct { - SHA256_CTX ictx; - SHA256_CTX octx; -} HMAC_SHA256_CTX; - -/** - * HMAC_SHA256_Init(ctx, K, Klen): - * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from - * ${K}. - */ -void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t); - -/** - * HMAC_SHA256_Update(ctx, in, len): - * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. - */ -void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t); - -/** - * HMAC_SHA256_Final(digest, ctx): - * Output the HMAC-SHA256 of the data input to the context ${ctx} into the - * buffer ${digest}. - */ -void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *); - -/** - * HMAC_SHA256_Buf_P2b(K, Klen, in, len, digest): - * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of - * length ${Klen}, and write the result to ${digest}. - */ -void HMAC_SHA256_Buf_P2b(const void *, size_t, const void *, size_t, uint8_t[32]); - -/** - * PBKDF2_SHA256_P2B(passwd, passwdlen, salt, saltlen, c, buf, dkLen): - * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and - * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). - */ -void PBKDF2_SHA256_P2B(const uint8_t *, size_t, const uint8_t *, size_t, - uint64_t, uint8_t *, size_t); - -#ifdef __cplusplus -} -#endif - -#endif /* !_SHA256_H_ */ diff --git a/yespower-1.0.1-power2b/sph_types.h b/yespower-1.0.1-power2b/sph_types.h deleted file mode 100755 index cef79bde1..000000000 --- a/yespower-1.0.1-power2b/sph_types.h +++ /dev/null @@ -1,1976 +0,0 @@ -/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ -/** - * Basic type definitions. - * - * This header file defines the generic integer types that will be used - * for the implementation of hash functions; it also contains helper - * functions which encode and decode multi-byte integer values, using - * either little-endian or big-endian conventions. - * - * This file contains a compile-time test on the size of a byte - * (the unsigned char C type). If bytes are not octets, - * i.e. if they do not have a size of exactly 8 bits, then compilation - * is aborted. Architectures where bytes are not octets are relatively - * rare, even in the embedded devices market. We forbid non-octet bytes - * because there is no clear convention on how octet streams are encoded - * on such systems. - * - * ==========================(LICENSE BEGIN)============================ - * - * Copyright (c) 2007-2010 Projet RNRT SAPHIR - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * ===========================(LICENSE END)============================= - * - * @file sph_types.h - * @author Thomas Pornin - */ - -#ifndef SPH_TYPES_H__ -#define SPH_TYPES_H__ - -#include - -/* - * All our I/O functions are defined over octet streams. We do not know - * how to handle input data if bytes are not octets. - */ -#if CHAR_BIT != 8 -#error This code requires 8-bit bytes -#endif - -/* ============= BEGIN documentation block for Doxygen ============ */ - -#ifdef DOXYGEN_IGNORE - -/** @mainpage sphlib C code documentation - * - * @section overview Overview - * - * sphlib is a library which contains implementations of - * various cryptographic hash functions. These pages have been generated - * with doxygen and - * document the API for the C implementations. - * - * The API is described in appropriate header files, which are available - * in the "Files" section. Each hash function family has its own header, - * whose name begins with "sph_" and contains the family - * name. For instance, the API for the RIPEMD hash functions is available - * in the header file sph_ripemd.h. - * - * @section principles API structure and conventions - * - * @subsection io Input/output conventions - * - * In all generality, hash functions operate over strings of bits. - * Individual bits are rarely encountered in C programming or actual - * communication protocols; most protocols converge on the ubiquitous - * "octet" which is a group of eight bits. Data is thus expressed as a - * stream of octets. The C programming language contains the notion of a - * "byte", which is a data unit managed under the type "unsigned - * char". The C standard prescribes that a byte should hold at - * least eight bits, but possibly more. Most modern architectures, even - * in the embedded world, feature eight-bit bytes, i.e. map bytes to - * octets. - * - * Nevertheless, for some of the implemented hash functions, an extra - * API has been added, which allows the input of arbitrary sequences of - * bits: when the computation is about to be closed, 1 to 7 extra bits - * can be added. The functions for which this API is implemented include - * the SHA-2 functions and all SHA-3 candidates. - * - * sphlib defines hash function which may hash octet streams, - * i.e. streams of bits where the number of bits is a multiple of eight. - * The data input functions in the sphlib API expect data - * as anonymous pointers ("const void *") with a length - * (of type "size_t") which gives the input data chunk length - * in bytes. A byte is assumed to be an octet; the sph_types.h - * header contains a compile-time test which prevents compilation on - * architectures where this property is not met. - * - * The hash function output is also converted into bytes. All currently - * implemented hash functions have an output width which is a multiple of - * eight, and this is likely to remain true for new designs. - * - * Most hash functions internally convert input data into 32-bit of 64-bit - * words, using either little-endian or big-endian conversion. The hash - * output also often consists of such words, which are encoded into output - * bytes with a similar endianness convention. Some hash functions have - * been only loosely specified on that subject; when necessary, - * sphlib has been tested against published "reference" - * implementations in order to use the same conventions. - * - * @subsection shortname Function short name - * - * Each implemented hash function has a "short name" which is used - * internally to derive the identifiers for the functions and context - * structures which the function uses. For instance, MD5 has the short - * name "md5". Short names are listed in the next section, - * for the implemented hash functions. In subsequent sections, the - * short name will be assumed to be "XXX": replace with the - * actual hash function name to get the C identifier. - * - * Note: some functions within the same family share the same core - * elements, such as update function or context structure. Correspondingly, - * some of the defined types or functions may actually be macros which - * transparently evaluate to another type or function name. - * - * @subsection context Context structure - * - * Each implemented hash fonction has its own context structure, available - * under the type name "sph_XXX_context" for the hash function - * with short name "XXX". This structure holds all needed - * state for a running hash computation. - * - * The contents of these structures are meant to be opaque, and private - * to the implementation. However, these contents are specified in the - * header files so that application code which uses sphlib - * may access the size of those structures. - * - * The caller is responsible for allocating the context structure, - * whether by dynamic allocation (malloc() or equivalent), - * static allocation (a global permanent variable), as an automatic - * variable ("on the stack"), or by any other mean which ensures proper - * structure alignment. sphlib code performs no dynamic - * allocation by itself. - * - * The context must be initialized before use, using the - * sph_XXX_init() function. This function sets the context - * state to proper initial values for hashing. - * - * Since all state data is contained within the context structure, - * sphlib is thread-safe and reentrant: several hash - * computations may be performed in parallel, provided that they do not - * operate on the same context. Moreover, a running computation can be - * cloned by copying the context (with a simple memcpy()): - * the context and its clone are then independant and may be updated - * with new data and/or closed without interfering with each other. - * Similarly, a context structure can be moved in memory at will: - * context structures contain no pointer, in particular no pointer to - * themselves. - * - * @subsection dataio Data input - * - * Hashed data is input with the sph_XXX() fonction, which - * takes as parameters a pointer to the context, a pointer to the data - * to hash, and the number of data bytes to hash. The context is updated - * with the new data. - * - * Data can be input in one or several calls, with arbitrary input lengths. - * However, it is best, performance wise, to input data by relatively big - * chunks (say a few kilobytes), because this allows sphlib to - * optimize things and avoid internal copying. - * - * When all data has been input, the context can be closed with - * sph_XXX_close(). The hash output is computed and written - * into the provided buffer. The caller must take care to provide a - * buffer of appropriate length; e.g., when using SHA-1, the output is - * a 20-byte word, therefore the output buffer must be at least 20-byte - * long. - * - * For some hash functions, the sph_XXX_addbits_and_close() - * function can be used instead of sph_XXX_close(). This - * function can take a few extra bits to be added at - * the end of the input message. This allows hashing messages with a - * bit length which is not a multiple of 8. The extra bits are provided - * as an unsigned integer value, and a bit count. The bit count must be - * between 0 and 7, inclusive. The extra bits are provided as bits 7 to - * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. - * For instance, to add three bits of value 1, 1 and 0, the unsigned - * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count - * will be 3. - * - * The SPH_SIZE_XXX macro is defined for each hash function; - * it evaluates to the function output size, expressed in bits. For instance, - * SPH_SIZE_sha1 evaluates to 160. - * - * When closed, the context is automatically reinitialized and can be - * immediately used for another computation. It is not necessary to call - * sph_XXX_init() after a close. Note that - * sph_XXX_init() can still be called to "reset" a context, - * i.e. forget previously input data, and get back to the initial state. - * - * @subsection alignment Data alignment - * - * "Alignment" is a property of data, which is said to be "properly - * aligned" when its emplacement in memory is such that the data can - * be optimally read by full words. This depends on the type of access; - * basically, some hash functions will read data by 32-bit or 64-bit - * words. sphlib does not mandate such alignment for input - * data, but using aligned data can substantially improve performance. - * - * As a rule, it is best to input data by chunks whose length (in bytes) - * is a multiple of eight, and which begins at "generally aligned" - * addresses, such as the base address returned by a call to - * malloc(). - * - * @section functions Implemented functions - * - * We give here the list of implemented functions. They are grouped by - * family; to each family corresponds a specific header file. Each - * individual function has its associated "short name". Please refer to - * the documentation for that header file to get details on the hash - * function denomination and provenance. - * - * Note: the functions marked with a '(64)' in the list below are - * available only if the C compiler provides an integer type of length - * 64 bits or more. Such a type is mandatory in the latest C standard - * (ISO 9899:1999, aka "C99") and is present in several older compilers - * as well, so chances are that such a type is available. - * - * - HAVAL family: file sph_haval.h - * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 - * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 - * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 - * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 - * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 - * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 - * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 - * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 - * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 - * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 - * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 - * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 - * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 - * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 - * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 - * - MD2: file sph_md2.h, short name: md2 - * - MD4: file sph_md4.h, short name: md4 - * - MD5: file sph_md5.h, short name: md5 - * - PANAMA: file sph_panama.h, short name: panama - * - RadioGatun family: file sph_radiogatun.h - * - RadioGatun[32]: short name: radiogatun32 - * - RadioGatun[64]: short name: radiogatun64 (64) - * - RIPEMD family: file sph_ripemd.h - * - RIPEMD: short name: ripemd - * - RIPEMD-128: short name: ripemd128 - * - RIPEMD-160: short name: ripemd160 - * - SHA-0: file sph_sha0.h, short name: sha0 - * - SHA-1: file sph_sha1.h, short name: sha1 - * - SHA-2 family, 32-bit hashes: file sph_sha2.h - * - SHA-224: short name: sha224 - * - SHA-256: short name: sha256 - * - SHA-384: short name: sha384 (64) - * - SHA-512: short name: sha512 (64) - * - Tiger family: file sph_tiger.h - * - Tiger: short name: tiger (64) - * - Tiger2: short name: tiger2 (64) - * - WHIRLPOOL family: file sph_whirlpool.h - * - WHIRLPOOL-0: short name: whirlpool0 (64) - * - WHIRLPOOL-1: short name: whirlpool1 (64) - * - WHIRLPOOL: short name: whirlpool (64) - * - * The fourteen second-round SHA-3 candidates are also implemented; - * when applicable, the implementations follow the "final" specifications - * as published for the third round of the SHA-3 competition (BLAKE, - * Groestl, JH, Keccak and Skein have been tweaked for third round). - * - * - BLAKE family: file sph_blake.h - * - BLAKE-224: short name: blake224 - * - BLAKE-256: short name: blake256 - * - BLAKE-384: short name: blake384 - * - BLAKE-512: short name: blake512 - * - BMW (Blue Midnight Wish) family: file sph_bmw.h - * - BMW-224: short name: bmw224 - * - BMW-256: short name: bmw256 - * - BMW-384: short name: bmw384 (64) - * - BMW-512: short name: bmw512 (64) - * - CubeHash family: file sph_cubehash.h (specified as - * CubeHash16/32 in the CubeHash specification) - * - CubeHash-224: short name: cubehash224 - * - CubeHash-256: short name: cubehash256 - * - CubeHash-384: short name: cubehash384 - * - CubeHash-512: short name: cubehash512 - * - ECHO family: file sph_echo.h - * - ECHO-224: short name: echo224 - * - ECHO-256: short name: echo256 - * - ECHO-384: short name: echo384 - * - ECHO-512: short name: echo512 - * - Fugue family: file sph_fugue.h - * - Fugue-224: short name: fugue224 - * - Fugue-256: short name: fugue256 - * - Fugue-384: short name: fugue384 - * - Fugue-512: short name: fugue512 - * - Groestl family: file sph_groestl.h - * - Groestl-224: short name: groestl224 - * - Groestl-256: short name: groestl256 - * - Groestl-384: short name: groestl384 - * - Groestl-512: short name: groestl512 - * - Hamsi family: file sph_hamsi.h - * - Hamsi-224: short name: hamsi224 - * - Hamsi-256: short name: hamsi256 - * - Hamsi-384: short name: hamsi384 - * - Hamsi-512: short name: hamsi512 - * - JH family: file sph_jh.h - * - JH-224: short name: jh224 - * - JH-256: short name: jh256 - * - JH-384: short name: jh384 - * - JH-512: short name: jh512 - * - Keccak family: file sph_keccak.h - * - Keccak-224: short name: keccak224 - * - Keccak-256: short name: keccak256 - * - Keccak-384: short name: keccak384 - * - Keccak-512: short name: keccak512 - * - Luffa family: file sph_luffa.h - * - Luffa-224: short name: luffa224 - * - Luffa-256: short name: luffa256 - * - Luffa-384: short name: luffa384 - * - Luffa-512: short name: luffa512 - * - Shabal family: file sph_shabal.h - * - Shabal-192: short name: shabal192 - * - Shabal-224: short name: shabal224 - * - Shabal-256: short name: shabal256 - * - Shabal-384: short name: shabal384 - * - Shabal-512: short name: shabal512 - * - SHAvite-3 family: file sph_shavite.h - * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): - * short name: shabal224 - * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): - * short name: shabal256 - * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): - * short name: shabal384 - * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): - * short name: shabal512 - * - SIMD family: file sph_simd.h - * - SIMD-224: short name: simd224 - * - SIMD-256: short name: simd256 - * - SIMD-384: short name: simd384 - * - SIMD-512: short name: simd512 - * - Skein family: file sph_skein.h - * - Skein-224 (nominally specified as Skein-512-224): short name: - * skein224 (64) - * - Skein-256 (nominally specified as Skein-512-256): short name: - * skein256 (64) - * - Skein-384 (nominally specified as Skein-512-384): short name: - * skein384 (64) - * - Skein-512 (nominally specified as Skein-512-512): short name: - * skein512 (64) - * - * For the second-round SHA-3 candidates, the functions are as specified - * for round 2, i.e. with the "tweaks" that some candidates added - * between round 1 and round 2. Also, some of the submitted packages for - * round 2 contained errors, in the specification, reference code, or - * both. sphlib implements the corrected versions. - */ - -/** @hideinitializer - * Unsigned integer type whose length is at least 32 bits; on most - * architectures, it will have a width of exactly 32 bits. Unsigned C - * types implement arithmetics modulo a power of 2; use the - * SPH_T32() macro to ensure that the value is truncated - * to exactly 32 bits. Unless otherwise specified, all macros and - * functions which accept sph_u32 values assume that these - * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures - * where sph_u32 is larger than that. - */ -typedef __arch_dependant__ sph_u32; - -/** @hideinitializer - * Signed integer type corresponding to sph_u32; it has - * width 32 bits or more. - */ -typedef __arch_dependant__ sph_s32; - -/** @hideinitializer - * Unsigned integer type whose length is at least 64 bits; on most - * architectures which feature such a type, it will have a width of - * exactly 64 bits. C99-compliant platform will have this type; it - * is also defined when the GNU compiler (gcc) is used, and on - * platforms where unsigned long is large enough. If this - * type is not available, then some hash functions which depends on - * a 64-bit type will not be available (most notably SHA-384, SHA-512, - * Tiger and WHIRLPOOL). - */ -typedef __arch_dependant__ sph_u64; - -/** @hideinitializer - * Signed integer type corresponding to sph_u64; it has - * width 64 bits or more. - */ -typedef __arch_dependant__ sph_s64; - -/** - * This macro expands the token x into a suitable - * constant expression of type sph_u32. Depending on - * how this type is defined, a suffix such as UL may - * be appended to the argument. - * - * @param x the token to expand into a suitable constant expression - */ -#define SPH_C32(x) - -/** - * Truncate a 32-bit value to exactly 32 bits. On most systems, this is - * a no-op, recognized as such by the compiler. - * - * @param x the value to truncate (of type sph_u32) - */ -#define SPH_T32(x) - -/** - * Rotate a 32-bit value by a number of bits to the left. The rotate - * count must reside between 1 and 31. This macro assumes that its - * first argument fits in 32 bits (no extra bit allowed on machines where - * sph_u32 is wider); both arguments may be evaluated - * several times. - * - * @param x the value to rotate (of type sph_u32) - * @param n the rotation count (between 1 and 31, inclusive) - */ -#define SPH_ROTL32(x, n) - -/** - * Rotate a 32-bit value by a number of bits to the left. The rotate - * count must reside between 1 and 31. This macro assumes that its - * first argument fits in 32 bits (no extra bit allowed on machines where - * sph_u32 is wider); both arguments may be evaluated - * several times. - * - * @param x the value to rotate (of type sph_u32) - * @param n the rotation count (between 1 and 31, inclusive) - */ -#define SPH_ROTR32(x, n) - -/** - * This macro is defined on systems for which a 64-bit type has been - * detected, and is used for sph_u64. - */ -#define SPH_64 - -/** - * This macro is defined on systems for the "native" integer size is - * 64 bits (64-bit values fit in one register). - */ -#define SPH_64_TRUE - -/** - * This macro expands the token x into a suitable - * constant expression of type sph_u64. Depending on - * how this type is defined, a suffix such as ULL may - * be appended to the argument. This macro is defined only if a - * 64-bit type was detected and used for sph_u64. - * - * @param x the token to expand into a suitable constant expression - */ -#define SPH_C64(x) - -/** - * Truncate a 64-bit value to exactly 64 bits. On most systems, this is - * a no-op, recognized as such by the compiler. This macro is defined only - * if a 64-bit type was detected and used for sph_u64. - * - * @param x the value to truncate (of type sph_u64) - */ -#define SPH_T64(x) - -/** - * Rotate a 64-bit value by a number of bits to the left. The rotate - * count must reside between 1 and 63. This macro assumes that its - * first argument fits in 64 bits (no extra bit allowed on machines where - * sph_u64 is wider); both arguments may be evaluated - * several times. This macro is defined only if a 64-bit type was detected - * and used for sph_u64. - * - * @param x the value to rotate (of type sph_u64) - * @param n the rotation count (between 1 and 63, inclusive) - */ -#define SPH_ROTL64(x, n) - -/** - * Rotate a 64-bit value by a number of bits to the left. The rotate - * count must reside between 1 and 63. This macro assumes that its - * first argument fits in 64 bits (no extra bit allowed on machines where - * sph_u64 is wider); both arguments may be evaluated - * several times. This macro is defined only if a 64-bit type was detected - * and used for sph_u64. - * - * @param x the value to rotate (of type sph_u64) - * @param n the rotation count (between 1 and 63, inclusive) - */ -#define SPH_ROTR64(x, n) - -/** - * This macro evaluates to inline or an equivalent construction, - * if available on the compilation platform, or to nothing otherwise. This - * is used to declare inline functions, for which the compiler should - * endeavour to include the code directly in the caller. Inline functions - * are typically defined in header files as replacement for macros. - */ -#define SPH_INLINE - -/** - * This macro is defined if the platform has been detected as using - * little-endian convention. This implies that the sph_u32 - * type (and the sph_u64 type also, if it is defined) has - * an exact width (i.e. exactly 32-bit, respectively 64-bit). - */ -#define SPH_LITTLE_ENDIAN - -/** - * This macro is defined if the platform has been detected as using - * big-endian convention. This implies that the sph_u32 - * type (and the sph_u64 type also, if it is defined) has - * an exact width (i.e. exactly 32-bit, respectively 64-bit). - */ -#define SPH_BIG_ENDIAN - -/** - * This macro is defined if 32-bit words (and 64-bit words, if defined) - * can be read from and written to memory efficiently in little-endian - * convention. This is the case for little-endian platforms, and also - * for the big-endian platforms which have special little-endian access - * opcodes (e.g. Ultrasparc). - */ -#define SPH_LITTLE_FAST - -/** - * This macro is defined if 32-bit words (and 64-bit words, if defined) - * can be read from and written to memory efficiently in big-endian - * convention. This is the case for little-endian platforms, and also - * for the little-endian platforms which have special big-endian access - * opcodes. - */ -#define SPH_BIG_FAST - -/** - * On some platforms, this macro is defined to an unsigned integer type - * into which pointer values may be cast. The resulting value can then - * be tested for being a multiple of 2, 4 or 8, indicating an aligned - * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. - */ -#define SPH_UPTR - -/** - * When defined, this macro indicates that unaligned memory accesses - * are possible with only a minor penalty, and thus should be prefered - * over strategies which first copy data to an aligned buffer. - */ -#define SPH_UNALIGNED - -/** - * Byte-swap a 32-bit word (i.e. 0x12345678 becomes - * 0x78563412). This is an inline function which resorts - * to inline assembly on some platforms, for better performance. - * - * @param x the 32-bit value to byte-swap - * @return the byte-swapped value - */ -static inline sph_u32 sph_bswap32(sph_u32 x); - -/** - * Byte-swap a 64-bit word. This is an inline function which resorts - * to inline assembly on some platforms, for better performance. This - * function is defined only if a suitable 64-bit type was found for - * sph_u64 - * - * @param x the 64-bit value to byte-swap - * @return the byte-swapped value - */ -static inline sph_u64 sph_bswap64(sph_u64 x); - -/** - * Decode a 16-bit unsigned value from memory, in little-endian convention - * (least significant byte comes first). - * - * @param src the source address - * @return the decoded value - */ -static inline unsigned sph_dec16le(const void *src); - -/** - * Encode a 16-bit unsigned value into memory, in little-endian convention - * (least significant byte comes first). - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc16le(void *dst, unsigned val); - -/** - * Decode a 16-bit unsigned value from memory, in big-endian convention - * (most significant byte comes first). - * - * @param src the source address - * @return the decoded value - */ -static inline unsigned sph_dec16be(const void *src); - -/** - * Encode a 16-bit unsigned value into memory, in big-endian convention - * (most significant byte comes first). - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc16be(void *dst, unsigned val); - -/** - * Decode a 32-bit unsigned value from memory, in little-endian convention - * (least significant byte comes first). - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u32 sph_dec32le(const void *src); - -/** - * Decode a 32-bit unsigned value from memory, in little-endian convention - * (least significant byte comes first). This function assumes that the - * source address is suitably aligned for a direct access, if the platform - * supports such things; it can thus be marginally faster than the generic - * sph_dec32le() function. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u32 sph_dec32le_aligned(const void *src); - -/** - * Encode a 32-bit unsigned value into memory, in little-endian convention - * (least significant byte comes first). - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc32le(void *dst, sph_u32 val); - -/** - * Encode a 32-bit unsigned value into memory, in little-endian convention - * (least significant byte comes first). This function assumes that the - * destination address is suitably aligned for a direct access, if the - * platform supports such things; it can thus be marginally faster than - * the generic sph_enc32le() function. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc32le_aligned(void *dst, sph_u32 val); - -/** - * Decode a 32-bit unsigned value from memory, in big-endian convention - * (most significant byte comes first). - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u32 sph_dec32be(const void *src); - -/** - * Decode a 32-bit unsigned value from memory, in big-endian convention - * (most significant byte comes first). This function assumes that the - * source address is suitably aligned for a direct access, if the platform - * supports such things; it can thus be marginally faster than the generic - * sph_dec32be() function. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u32 sph_dec32be_aligned(const void *src); - -/** - * Encode a 32-bit unsigned value into memory, in big-endian convention - * (most significant byte comes first). - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc32be(void *dst, sph_u32 val); - -/** - * Encode a 32-bit unsigned value into memory, in big-endian convention - * (most significant byte comes first). This function assumes that the - * destination address is suitably aligned for a direct access, if the - * platform supports such things; it can thus be marginally faster than - * the generic sph_enc32be() function. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc32be_aligned(void *dst, sph_u32 val); - -/** - * Decode a 64-bit unsigned value from memory, in little-endian convention - * (least significant byte comes first). This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u64 sph_dec64le(const void *src); - -/** - * Decode a 64-bit unsigned value from memory, in little-endian convention - * (least significant byte comes first). This function assumes that the - * source address is suitably aligned for a direct access, if the platform - * supports such things; it can thus be marginally faster than the generic - * sph_dec64le() function. This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u64 sph_dec64le_aligned(const void *src); - -/** - * Encode a 64-bit unsigned value into memory, in little-endian convention - * (least significant byte comes first). This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc64le(void *dst, sph_u64 val); - -/** - * Encode a 64-bit unsigned value into memory, in little-endian convention - * (least significant byte comes first). This function assumes that the - * destination address is suitably aligned for a direct access, if the - * platform supports such things; it can thus be marginally faster than - * the generic sph_enc64le() function. This function is defined - * only if a suitable 64-bit type was detected and used for - * sph_u64. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc64le_aligned(void *dst, sph_u64 val); - -/** - * Decode a 64-bit unsigned value from memory, in big-endian convention - * (most significant byte comes first). This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u64 sph_dec64be(const void *src); - -/** - * Decode a 64-bit unsigned value from memory, in big-endian convention - * (most significant byte comes first). This function assumes that the - * source address is suitably aligned for a direct access, if the platform - * supports such things; it can thus be marginally faster than the generic - * sph_dec64be() function. This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param src the source address - * @return the decoded value - */ -static inline sph_u64 sph_dec64be_aligned(const void *src); - -/** - * Encode a 64-bit unsigned value into memory, in big-endian convention - * (most significant byte comes first). This function is defined only - * if a suitable 64-bit type was detected and used for sph_u64. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc64be(void *dst, sph_u64 val); - -/** - * Encode a 64-bit unsigned value into memory, in big-endian convention - * (most significant byte comes first). This function assumes that the - * destination address is suitably aligned for a direct access, if the - * platform supports such things; it can thus be marginally faster than - * the generic sph_enc64be() function. This function is defined - * only if a suitable 64-bit type was detected and used for - * sph_u64. - * - * @param dst the destination buffer - * @param val the value to encode - */ -static inline void sph_enc64be_aligned(void *dst, sph_u64 val); - -#endif - -/* ============== END documentation block for Doxygen ============= */ - -#ifndef DOXYGEN_IGNORE - -/* - * We want to define the types "sph_u32" and "sph_u64" which hold - * unsigned values of at least, respectively, 32 and 64 bits. These - * tests should select appropriate types for most platforms. The - * macro "SPH_64" is defined if the 64-bit is supported. - */ - -#undef SPH_64 -#undef SPH_64_TRUE - -#if defined __STDC__ && __STDC_VERSION__ >= 199901L - -/* - * On C99 implementations, we can use to get an exact 64-bit - * type, if any, or otherwise use a wider type (which must exist, for - * C99 conformance). - */ - -#include - -#ifdef UINT32_MAX -typedef uint32_t sph_u32; -typedef int32_t sph_s32; -#else -typedef uint_fast32_t sph_u32; -typedef int_fast32_t sph_s32; -#endif -#if !SPH_NO_64 -#ifdef UINT64_MAX -typedef uint64_t sph_u64; -typedef int64_t sph_s64; -#else -typedef uint_fast64_t sph_u64; -typedef int_fast64_t sph_s64; -#endif -#endif - -#define SPH_C32(x) ((sph_u32)(x)) -#if !SPH_NO_64 -#define SPH_C64(x) ((sph_u64)(x)) -#define SPH_64 1 -#endif - -#else - -/* - * On non-C99 systems, we use "unsigned int" if it is wide enough, - * "unsigned long" otherwise. This supports all "reasonable" architectures. - * We have to be cautious: pre-C99 preprocessors handle constants - * differently in '#if' expressions. Hence the shifts to test UINT_MAX. - */ - -#if ((UINT_MAX >> 11) >> 11) >= 0x3FF - -typedef unsigned int sph_u32; -typedef int sph_s32; - -#define SPH_C32(x) ((sph_u32)(x ## U)) - -#else - -typedef unsigned long sph_u32; -typedef long sph_s32; - -#define SPH_C32(x) ((sph_u32)(x ## UL)) - -#endif - -#if !SPH_NO_64 - -/* - * We want a 64-bit type. We use "unsigned long" if it is wide enough (as - * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), - * "unsigned long long" otherwise, if available. We use ULLONG_MAX to - * test whether "unsigned long long" is available; we also know that - * gcc features this type, even if the libc header do not know it. - */ - -#if ((ULONG_MAX >> 31) >> 31) >= 3 - -typedef unsigned long sph_u64; -typedef long sph_s64; - -#define SPH_C64(x) ((sph_u64)(x ## UL)) - -#define SPH_64 1 - -#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ - -typedef unsigned long long sph_u64; -typedef long long sph_s64; - -#define SPH_C64(x) ((sph_u64)(x ## ULL)) - -#define SPH_64 1 - -#else - -/* - * No 64-bit type... - */ - -#endif - -#endif - -#endif - -/* - * If the "unsigned long" type has length 64 bits or more, then this is - * a "true" 64-bit architectures. This is also true with Visual C on - * amd64, even though the "long" type is limited to 32 bits. - */ -#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) -#define SPH_64_TRUE 1 -#endif - -/* - * Implementation note: some processors have specific opcodes to perform - * a rotation. Recent versions of gcc recognize the expression above and - * use the relevant opcodes, when appropriate. - */ - -#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) -#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) - -#if SPH_64 - -#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) -#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) - -#endif - -#ifndef DOXYGEN_IGNORE -/* - * Define SPH_INLINE to be an "inline" qualifier, if available. We define - * some small macro-like functions which benefit greatly from being inlined. - */ -#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ -#define SPH_INLINE inline -#elif defined _MSC_VER -#define SPH_INLINE __inline -#else -#define SPH_INLINE -#endif -#endif - -/* - * We define some macros which qualify the architecture. These macros - * may be explicit set externally (e.g. as compiler parameters). The - * code below sets those macros if they are not already defined. - * - * Most macros are boolean, thus evaluate to either zero or non-zero. - * The SPH_UPTR macro is special, in that it evaluates to a C type, - * or is not defined. - * - * SPH_UPTR if defined: unsigned type to cast pointers into - * - * SPH_UNALIGNED non-zero if unaligned accesses are efficient - * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian - * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian - * SPH_LITTLE_FAST non-zero if little-endian decoding is fast - * SPH_BIG_FAST non-zero if big-endian decoding is fast - * - * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit - * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN - * _must_ be non-zero in those situations. The 32-bit and 64-bit types - * _must_ also have an exact width. - * - * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode - * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode - * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc - * SPH_I386_GCC x86-compatible (32-bit) with gcc - * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C - * SPH_AMD64_GCC x86-compatible (64-bit) with gcc - * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C - * SPH_PPC32_GCC PowerPC, 32-bit, with gcc - * SPH_PPC64_GCC PowerPC, 64-bit, with gcc - * - * TODO: enhance automatic detection, for more architectures and compilers. - * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with - * some very fast functions (e.g. MD4) when using unaligned input data. - * The CPU-specific-with-GCC macros are useful only for inline assembly, - * normally restrained to this header file. - */ - -/* - * 32-bit x86, aka "i386 compatible". - */ -#if defined __i386__ || defined _M_IX86 - -#define SPH_DETECT_UNALIGNED 1 -#define SPH_DETECT_LITTLE_ENDIAN 1 -#define SPH_DETECT_UPTR sph_u32 -#ifdef __GNUC__ -#define SPH_DETECT_I386_GCC 1 -#endif -#ifdef _MSC_VER -#define SPH_DETECT_I386_MSVC 1 -#endif - -/* - * 64-bit x86, hereafter known as "amd64". - */ -#elif defined __x86_64 || defined _M_X64 - -#define SPH_DETECT_UNALIGNED 1 -#define SPH_DETECT_LITTLE_ENDIAN 1 -#define SPH_DETECT_UPTR sph_u64 -#ifdef __GNUC__ -#define SPH_DETECT_AMD64_GCC 1 -#endif -#ifdef _MSC_VER -#define SPH_DETECT_AMD64_MSVC 1 -#endif - -/* - * 64-bit Sparc architecture (implies v9). - */ -#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ - || defined __sparcv9 - -#define SPH_DETECT_BIG_ENDIAN 1 -#define SPH_DETECT_UPTR sph_u64 -#ifdef __GNUC__ -#define SPH_DETECT_SPARCV9_GCC_64 1 -#define SPH_DETECT_LITTLE_FAST 1 -#endif - -/* - * 32-bit Sparc. - */ -#elif (defined __sparc__ || defined __sparc) \ - && !(defined __sparcv9 || defined __arch64__) - -#define SPH_DETECT_BIG_ENDIAN 1 -#define SPH_DETECT_UPTR sph_u32 -#if defined __GNUC__ && defined __sparc_v9__ -#define SPH_DETECT_SPARCV9_GCC_32 1 -#define SPH_DETECT_LITTLE_FAST 1 -#endif - -/* - * ARM, little-endian. - */ -#elif defined __arm__ && __ARMEL__ - -#define SPH_DETECT_LITTLE_ENDIAN 1 - -/* - * MIPS, little-endian. - */ -#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ - -#define SPH_DETECT_LITTLE_ENDIAN 1 - -/* - * MIPS, big-endian. - */ -#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ - -#define SPH_DETECT_BIG_ENDIAN 1 - -/* - * PowerPC. - */ -#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ - || defined _ARCH_PPC - -/* - * Note: we do not declare cross-endian access to be "fast": even if - * using inline assembly, implementation should still assume that - * keeping the decoded word in a temporary is faster than decoding - * it again. - */ -#if defined __GNUC__ -#if SPH_64_TRUE -#define SPH_DETECT_PPC64_GCC 1 -#else -#define SPH_DETECT_PPC32_GCC 1 -#endif -#endif - -#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN -#define SPH_DETECT_BIG_ENDIAN 1 -#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN -#define SPH_DETECT_LITTLE_ENDIAN 1 -#endif - -/* - * Itanium, 64-bit. - */ -#elif defined __ia64 || defined __ia64__ \ - || defined __itanium__ || defined _M_IA64 - -#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN -#define SPH_DETECT_BIG_ENDIAN 1 -#else -#define SPH_DETECT_LITTLE_ENDIAN 1 -#endif -#if defined __LP64__ || defined _LP64 -#define SPH_DETECT_UPTR sph_u64 -#else -#define SPH_DETECT_UPTR sph_u32 -#endif - -#endif - -#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 -#define SPH_DETECT_SPARCV9_GCC 1 -#endif - -#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED -#define SPH_UNALIGNED SPH_DETECT_UNALIGNED -#endif -#if defined SPH_DETECT_UPTR && !defined SPH_UPTR -#define SPH_UPTR SPH_DETECT_UPTR -#endif -#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN -#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN -#endif -#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN -#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN -#endif -#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST -#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST -#endif -#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST -#define SPH_BIG_FAST SPH_DETECT_BIG_FAST -#endif -#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 -#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 -#endif -#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 -#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 -#endif -#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC -#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC -#endif -#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC -#define SPH_I386_GCC SPH_DETECT_I386_GCC -#endif -#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC -#define SPH_I386_MSVC SPH_DETECT_I386_MSVC -#endif -#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC -#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC -#endif -#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC -#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC -#endif -#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC -#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC -#endif -#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC -#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC -#endif - -#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST -#define SPH_LITTLE_FAST 1 -#endif -#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST -#define SPH_BIG_FAST 1 -#endif - -#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) -#error SPH_UPTR defined, but endianness is not known. -#endif - -#if SPH_I386_GCC && !SPH_NO_ASM - -/* - * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit - * values. - */ - -static SPH_INLINE sph_u32 -sph_bswap32(sph_u32 x) -{ - __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); - return x; -} - -#if SPH_64 - -static SPH_INLINE sph_u64 -sph_bswap64(sph_u64 x) -{ - return ((sph_u64)sph_bswap32((sph_u32)x) << 32) - | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); -} - -#endif - -#elif SPH_AMD64_GCC && !SPH_NO_ASM - -/* - * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit - * and 64-bit values. - */ - -static SPH_INLINE sph_u32 -sph_bswap32(sph_u32 x) -{ - __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); - return x; -} - -#if SPH_64 - -static SPH_INLINE sph_u64 -sph_bswap64(sph_u64 x) -{ - __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); - return x; -} - -#endif - -/* - * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough - * to generate proper opcodes for endianness swapping with the pure C - * implementation below. - * - -#elif SPH_I386_MSVC && !SPH_NO_ASM - -static __inline sph_u32 __declspec(naked) __fastcall -sph_bswap32(sph_u32 x) -{ - __asm { - bswap ecx - mov eax,ecx - ret - } -} - -#if SPH_64 - -static SPH_INLINE sph_u64 -sph_bswap64(sph_u64 x) -{ - return ((sph_u64)sph_bswap32((sph_u32)x) << 32) - | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); -} - -#endif - - * - * [end of disabled code] - */ - -#else - -static SPH_INLINE sph_u32 -sph_bswap32(sph_u32 x) -{ - x = SPH_T32((x << 16) | (x >> 16)); - x = ((x & SPH_C32(0xFF00FF00)) >> 8) - | ((x & SPH_C32(0x00FF00FF)) << 8); - return x; -} - -#if SPH_64 - -/** - * Byte-swap a 64-bit value. - * - * @param x the input value - * @return the byte-swapped value - */ -static SPH_INLINE sph_u64 -sph_bswap64(sph_u64 x) -{ - x = SPH_T64((x << 32) | (x >> 32)); - x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) - | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); - x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) - | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); - return x; -} - -#endif - -#endif - -#if SPH_SPARCV9_GCC && !SPH_NO_ASM - -/* - * On UltraSPARC systems, native ordering is big-endian, but it is - * possible to perform little-endian read accesses by specifying the - * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use - * the opcode "lda [%reg]0x88,%dst", where %reg is the register which - * contains the source address and %dst is the destination register, - * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register - * to get the address space name. The latter format is better since it - * combines an addition and the actual access in a single opcode; but - * it requires the setting (and subsequent resetting) of %asi, which is - * slow. Some operations (i.e. MD5 compression function) combine many - * successive little-endian read accesses, which may share the same - * %asi setting. The macros below contain the appropriate inline - * assembly. - */ - -#define SPH_SPARCV9_SET_ASI \ - sph_u32 sph_sparcv9_asi; \ - __asm__ __volatile__ ( \ - "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); - -#define SPH_SPARCV9_RESET_ASI \ - __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); - -#define SPH_SPARCV9_DEC32LE(base, idx) ({ \ - sph_u32 sph_sparcv9_tmp; \ - __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ - : "=r" (sph_sparcv9_tmp) : "r" (base)); \ - sph_sparcv9_tmp; \ - }) - -#endif - -static SPH_INLINE void -sph_enc16be(void *dst, unsigned val) -{ - ((unsigned char *)dst)[0] = (val >> 8); - ((unsigned char *)dst)[1] = val; -} - -static SPH_INLINE unsigned -sph_dec16be(const void *src) -{ - return ((unsigned)(((const unsigned char *)src)[0]) << 8) - | (unsigned)(((const unsigned char *)src)[1]); -} - -static SPH_INLINE void -sph_enc16le(void *dst, unsigned val) -{ - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = val >> 8; -} - -static SPH_INLINE unsigned -sph_dec16le(const void *src) -{ - return (unsigned)(((const unsigned char *)src)[0]) - | ((unsigned)(((const unsigned char *)src)[1]) << 8); -} - -/** - * Encode a 32-bit value into the provided buffer (big endian convention). - * - * @param dst the destination buffer - * @param val the 32-bit value to encode - */ -static SPH_INLINE void -sph_enc32be(void *dst, sph_u32 val) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_LITTLE_ENDIAN - val = sph_bswap32(val); -#endif - *(sph_u32 *)dst = val; -#else - if (((SPH_UPTR)dst & 3) == 0) { -#if SPH_LITTLE_ENDIAN - val = sph_bswap32(val); -#endif - *(sph_u32 *)dst = val; - } else { - ((unsigned char *)dst)[0] = (val >> 24); - ((unsigned char *)dst)[1] = (val >> 16); - ((unsigned char *)dst)[2] = (val >> 8); - ((unsigned char *)dst)[3] = val; - } -#endif -#else - ((unsigned char *)dst)[0] = (val >> 24); - ((unsigned char *)dst)[1] = (val >> 16); - ((unsigned char *)dst)[2] = (val >> 8); - ((unsigned char *)dst)[3] = val; -#endif -} - -/** - * Encode a 32-bit value into the provided buffer (big endian convention). - * The destination buffer must be properly aligned. - * - * @param dst the destination buffer (32-bit aligned) - * @param val the value to encode - */ -static SPH_INLINE void -sph_enc32be_aligned(void *dst, sph_u32 val) -{ -#if SPH_LITTLE_ENDIAN - *(sph_u32 *)dst = sph_bswap32(val); -#elif SPH_BIG_ENDIAN - *(sph_u32 *)dst = val; -#else - ((unsigned char *)dst)[0] = (val >> 24); - ((unsigned char *)dst)[1] = (val >> 16); - ((unsigned char *)dst)[2] = (val >> 8); - ((unsigned char *)dst)[3] = val; -#endif -} - -/** - * Decode a 32-bit value from the provided buffer (big endian convention). - * - * @param src the source buffer - * @return the decoded value - */ -static SPH_INLINE sph_u32 -sph_dec32be(const void *src) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_LITTLE_ENDIAN - return sph_bswap32(*(const sph_u32 *)src); -#else - return *(const sph_u32 *)src; -#endif -#else - if (((SPH_UPTR)src & 3) == 0) { -#if SPH_LITTLE_ENDIAN - return sph_bswap32(*(const sph_u32 *)src); -#else - return *(const sph_u32 *)src; -#endif - } else { - return ((sph_u32)(((const unsigned char *)src)[0]) << 24) - | ((sph_u32)(((const unsigned char *)src)[1]) << 16) - | ((sph_u32)(((const unsigned char *)src)[2]) << 8) - | (sph_u32)(((const unsigned char *)src)[3]); - } -#endif -#else - return ((sph_u32)(((const unsigned char *)src)[0]) << 24) - | ((sph_u32)(((const unsigned char *)src)[1]) << 16) - | ((sph_u32)(((const unsigned char *)src)[2]) << 8) - | (sph_u32)(((const unsigned char *)src)[3]); -#endif -} - -/** - * Decode a 32-bit value from the provided buffer (big endian convention). - * The source buffer must be properly aligned. - * - * @param src the source buffer (32-bit aligned) - * @return the decoded value - */ -static SPH_INLINE sph_u32 -sph_dec32be_aligned(const void *src) -{ -#if SPH_LITTLE_ENDIAN - return sph_bswap32(*(const sph_u32 *)src); -#elif SPH_BIG_ENDIAN - return *(const sph_u32 *)src; -#else - return ((sph_u32)(((const unsigned char *)src)[0]) << 24) - | ((sph_u32)(((const unsigned char *)src)[1]) << 16) - | ((sph_u32)(((const unsigned char *)src)[2]) << 8) - | (sph_u32)(((const unsigned char *)src)[3]); -#endif -} - -/** - * Encode a 32-bit value into the provided buffer (little endian convention). - * - * @param dst the destination buffer - * @param val the 32-bit value to encode - */ -static SPH_INLINE void -sph_enc32le(void *dst, sph_u32 val) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_BIG_ENDIAN - val = sph_bswap32(val); -#endif - *(sph_u32 *)dst = val; -#else - if (((SPH_UPTR)dst & 3) == 0) { -#if SPH_BIG_ENDIAN - val = sph_bswap32(val); -#endif - *(sph_u32 *)dst = val; - } else { - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); - } -#endif -#else - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); -#endif -} - -/** - * Encode a 32-bit value into the provided buffer (little endian convention). - * The destination buffer must be properly aligned. - * - * @param dst the destination buffer (32-bit aligned) - * @param val the value to encode - */ -static SPH_INLINE void -sph_enc32le_aligned(void *dst, sph_u32 val) -{ -#if SPH_LITTLE_ENDIAN - *(sph_u32 *)dst = val; -#elif SPH_BIG_ENDIAN - *(sph_u32 *)dst = sph_bswap32(val); -#else - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); -#endif -} - -/** - * Decode a 32-bit value from the provided buffer (little endian convention). - * - * @param src the source buffer - * @return the decoded value - */ -static SPH_INLINE sph_u32 -sph_dec32le(const void *src) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_BIG_ENDIAN - return sph_bswap32(*(const sph_u32 *)src); -#else - return *(const sph_u32 *)src; -#endif -#else - if (((SPH_UPTR)src & 3) == 0) { -#if SPH_BIG_ENDIAN -#if SPH_SPARCV9_GCC && !SPH_NO_ASM - sph_u32 tmp; - - /* - * "__volatile__" is needed here because without it, - * gcc-3.4.3 miscompiles the code and performs the - * access before the test on the address, thus triggering - * a bus error... - */ - __asm__ __volatile__ ( - "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); - return tmp; -/* - * On PowerPC, this turns out not to be worth the effort: the inline - * assembly makes GCC optimizer uncomfortable, which tends to nullify - * the decoding gains. - * - * For most hash functions, using this inline assembly trick changes - * hashing speed by less than 5% and often _reduces_ it. The biggest - * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is - * less then 10%. The speed gain on CubeHash is probably due to the - * chronic shortage of registers that CubeHash endures; for the other - * functions, the generic code appears to be efficient enough already. - * -#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM - sph_u32 tmp; - - __asm__ __volatile__ ( - "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); - return tmp; - */ -#else - return sph_bswap32(*(const sph_u32 *)src); -#endif -#else - return *(const sph_u32 *)src; -#endif - } else { - return (sph_u32)(((const unsigned char *)src)[0]) - | ((sph_u32)(((const unsigned char *)src)[1]) << 8) - | ((sph_u32)(((const unsigned char *)src)[2]) << 16) - | ((sph_u32)(((const unsigned char *)src)[3]) << 24); - } -#endif -#else - return (sph_u32)(((const unsigned char *)src)[0]) - | ((sph_u32)(((const unsigned char *)src)[1]) << 8) - | ((sph_u32)(((const unsigned char *)src)[2]) << 16) - | ((sph_u32)(((const unsigned char *)src)[3]) << 24); -#endif -} - -/** - * Decode a 32-bit value from the provided buffer (little endian convention). - * The source buffer must be properly aligned. - * - * @param src the source buffer (32-bit aligned) - * @return the decoded value - */ -static SPH_INLINE sph_u32 -sph_dec32le_aligned(const void *src) -{ -#if SPH_LITTLE_ENDIAN - return *(const sph_u32 *)src; -#elif SPH_BIG_ENDIAN -#if SPH_SPARCV9_GCC && !SPH_NO_ASM - sph_u32 tmp; - - __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); - return tmp; -/* - * Not worth it generally. - * -#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM - sph_u32 tmp; - - __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); - return tmp; - */ -#else - return sph_bswap32(*(const sph_u32 *)src); -#endif -#else - return (sph_u32)(((const unsigned char *)src)[0]) - | ((sph_u32)(((const unsigned char *)src)[1]) << 8) - | ((sph_u32)(((const unsigned char *)src)[2]) << 16) - | ((sph_u32)(((const unsigned char *)src)[3]) << 24); -#endif -} - -#if SPH_64 - -/** - * Encode a 64-bit value into the provided buffer (big endian convention). - * - * @param dst the destination buffer - * @param val the 64-bit value to encode - */ -static SPH_INLINE void -sph_enc64be(void *dst, sph_u64 val) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_LITTLE_ENDIAN - val = sph_bswap64(val); -#endif - *(sph_u64 *)dst = val; -#else - if (((SPH_UPTR)dst & 7) == 0) { -#if SPH_LITTLE_ENDIAN - val = sph_bswap64(val); -#endif - *(sph_u64 *)dst = val; - } else { - ((unsigned char *)dst)[0] = (val >> 56); - ((unsigned char *)dst)[1] = (val >> 48); - ((unsigned char *)dst)[2] = (val >> 40); - ((unsigned char *)dst)[3] = (val >> 32); - ((unsigned char *)dst)[4] = (val >> 24); - ((unsigned char *)dst)[5] = (val >> 16); - ((unsigned char *)dst)[6] = (val >> 8); - ((unsigned char *)dst)[7] = val; - } -#endif -#else - ((unsigned char *)dst)[0] = (val >> 56); - ((unsigned char *)dst)[1] = (val >> 48); - ((unsigned char *)dst)[2] = (val >> 40); - ((unsigned char *)dst)[3] = (val >> 32); - ((unsigned char *)dst)[4] = (val >> 24); - ((unsigned char *)dst)[5] = (val >> 16); - ((unsigned char *)dst)[6] = (val >> 8); - ((unsigned char *)dst)[7] = val; -#endif -} - -/** - * Encode a 64-bit value into the provided buffer (big endian convention). - * The destination buffer must be properly aligned. - * - * @param dst the destination buffer (64-bit aligned) - * @param val the value to encode - */ -static SPH_INLINE void -sph_enc64be_aligned(void *dst, sph_u64 val) -{ -#if SPH_LITTLE_ENDIAN - *(sph_u64 *)dst = sph_bswap64(val); -#elif SPH_BIG_ENDIAN - *(sph_u64 *)dst = val; -#else - ((unsigned char *)dst)[0] = (val >> 56); - ((unsigned char *)dst)[1] = (val >> 48); - ((unsigned char *)dst)[2] = (val >> 40); - ((unsigned char *)dst)[3] = (val >> 32); - ((unsigned char *)dst)[4] = (val >> 24); - ((unsigned char *)dst)[5] = (val >> 16); - ((unsigned char *)dst)[6] = (val >> 8); - ((unsigned char *)dst)[7] = val; -#endif -} - -/** - * Decode a 64-bit value from the provided buffer (big endian convention). - * - * @param src the source buffer - * @return the decoded value - */ -static SPH_INLINE sph_u64 -sph_dec64be(const void *src) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_LITTLE_ENDIAN - return sph_bswap64(*(const sph_u64 *)src); -#else - return *(const sph_u64 *)src; -#endif -#else - if (((SPH_UPTR)src & 7) == 0) { -#if SPH_LITTLE_ENDIAN - return sph_bswap64(*(const sph_u64 *)src); -#else - return *(const sph_u64 *)src; -#endif - } else { - return ((sph_u64)(((const unsigned char *)src)[0]) << 56) - | ((sph_u64)(((const unsigned char *)src)[1]) << 48) - | ((sph_u64)(((const unsigned char *)src)[2]) << 40) - | ((sph_u64)(((const unsigned char *)src)[3]) << 32) - | ((sph_u64)(((const unsigned char *)src)[4]) << 24) - | ((sph_u64)(((const unsigned char *)src)[5]) << 16) - | ((sph_u64)(((const unsigned char *)src)[6]) << 8) - | (sph_u64)(((const unsigned char *)src)[7]); - } -#endif -#else - return ((sph_u64)(((const unsigned char *)src)[0]) << 56) - | ((sph_u64)(((const unsigned char *)src)[1]) << 48) - | ((sph_u64)(((const unsigned char *)src)[2]) << 40) - | ((sph_u64)(((const unsigned char *)src)[3]) << 32) - | ((sph_u64)(((const unsigned char *)src)[4]) << 24) - | ((sph_u64)(((const unsigned char *)src)[5]) << 16) - | ((sph_u64)(((const unsigned char *)src)[6]) << 8) - | (sph_u64)(((const unsigned char *)src)[7]); -#endif -} - -/** - * Decode a 64-bit value from the provided buffer (big endian convention). - * The source buffer must be properly aligned. - * - * @param src the source buffer (64-bit aligned) - * @return the decoded value - */ -static SPH_INLINE sph_u64 -sph_dec64be_aligned(const void *src) -{ -#if SPH_LITTLE_ENDIAN - return sph_bswap64(*(const sph_u64 *)src); -#elif SPH_BIG_ENDIAN - return *(const sph_u64 *)src; -#else - return ((sph_u64)(((const unsigned char *)src)[0]) << 56) - | ((sph_u64)(((const unsigned char *)src)[1]) << 48) - | ((sph_u64)(((const unsigned char *)src)[2]) << 40) - | ((sph_u64)(((const unsigned char *)src)[3]) << 32) - | ((sph_u64)(((const unsigned char *)src)[4]) << 24) - | ((sph_u64)(((const unsigned char *)src)[5]) << 16) - | ((sph_u64)(((const unsigned char *)src)[6]) << 8) - | (sph_u64)(((const unsigned char *)src)[7]); -#endif -} - -/** - * Encode a 64-bit value into the provided buffer (little endian convention). - * - * @param dst the destination buffer - * @param val the 64-bit value to encode - */ -static SPH_INLINE void -sph_enc64le(void *dst, sph_u64 val) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_BIG_ENDIAN - val = sph_bswap64(val); -#endif - *(sph_u64 *)dst = val; -#else - if (((SPH_UPTR)dst & 7) == 0) { -#if SPH_BIG_ENDIAN - val = sph_bswap64(val); -#endif - *(sph_u64 *)dst = val; - } else { - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); - ((unsigned char *)dst)[4] = (val >> 32); - ((unsigned char *)dst)[5] = (val >> 40); - ((unsigned char *)dst)[6] = (val >> 48); - ((unsigned char *)dst)[7] = (val >> 56); - } -#endif -#else - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); - ((unsigned char *)dst)[4] = (val >> 32); - ((unsigned char *)dst)[5] = (val >> 40); - ((unsigned char *)dst)[6] = (val >> 48); - ((unsigned char *)dst)[7] = (val >> 56); -#endif -} - -/** - * Encode a 64-bit value into the provided buffer (little endian convention). - * The destination buffer must be properly aligned. - * - * @param dst the destination buffer (64-bit aligned) - * @param val the value to encode - */ -static SPH_INLINE void -sph_enc64le_aligned(void *dst, sph_u64 val) -{ -#if SPH_LITTLE_ENDIAN - *(sph_u64 *)dst = val; -#elif SPH_BIG_ENDIAN - *(sph_u64 *)dst = sph_bswap64(val); -#else - ((unsigned char *)dst)[0] = val; - ((unsigned char *)dst)[1] = (val >> 8); - ((unsigned char *)dst)[2] = (val >> 16); - ((unsigned char *)dst)[3] = (val >> 24); - ((unsigned char *)dst)[4] = (val >> 32); - ((unsigned char *)dst)[5] = (val >> 40); - ((unsigned char *)dst)[6] = (val >> 48); - ((unsigned char *)dst)[7] = (val >> 56); -#endif -} - -/** - * Decode a 64-bit value from the provided buffer (little endian convention). - * - * @param src the source buffer - * @return the decoded value - */ -static SPH_INLINE sph_u64 -sph_dec64le(const void *src) -{ -#if defined SPH_UPTR -#if SPH_UNALIGNED -#if SPH_BIG_ENDIAN - return sph_bswap64(*(const sph_u64 *)src); -#else - return *(const sph_u64 *)src; -#endif -#else - if (((SPH_UPTR)src & 7) == 0) { -#if SPH_BIG_ENDIAN -#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM - sph_u64 tmp; - - __asm__ __volatile__ ( - "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); - return tmp; -/* - * Not worth it generally. - * -#elif SPH_PPC32_GCC && !SPH_NO_ASM - return (sph_u64)sph_dec32le_aligned(src) - | ((sph_u64)sph_dec32le_aligned( - (const char *)src + 4) << 32); -#elif SPH_PPC64_GCC && !SPH_NO_ASM - sph_u64 tmp; - - __asm__ __volatile__ ( - "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); - return tmp; - */ -#else - return sph_bswap64(*(const sph_u64 *)src); -#endif -#else - return *(const sph_u64 *)src; -#endif - } else { - return (sph_u64)(((const unsigned char *)src)[0]) - | ((sph_u64)(((const unsigned char *)src)[1]) << 8) - | ((sph_u64)(((const unsigned char *)src)[2]) << 16) - | ((sph_u64)(((const unsigned char *)src)[3]) << 24) - | ((sph_u64)(((const unsigned char *)src)[4]) << 32) - | ((sph_u64)(((const unsigned char *)src)[5]) << 40) - | ((sph_u64)(((const unsigned char *)src)[6]) << 48) - | ((sph_u64)(((const unsigned char *)src)[7]) << 56); - } -#endif -#else - return (sph_u64)(((const unsigned char *)src)[0]) - | ((sph_u64)(((const unsigned char *)src)[1]) << 8) - | ((sph_u64)(((const unsigned char *)src)[2]) << 16) - | ((sph_u64)(((const unsigned char *)src)[3]) << 24) - | ((sph_u64)(((const unsigned char *)src)[4]) << 32) - | ((sph_u64)(((const unsigned char *)src)[5]) << 40) - | ((sph_u64)(((const unsigned char *)src)[6]) << 48) - | ((sph_u64)(((const unsigned char *)src)[7]) << 56); -#endif -} - -/** - * Decode a 64-bit value from the provided buffer (little endian convention). - * The source buffer must be properly aligned. - * - * @param src the source buffer (64-bit aligned) - * @return the decoded value - */ -static SPH_INLINE sph_u64 -sph_dec64le_aligned(const void *src) -{ -#if SPH_LITTLE_ENDIAN - return *(const sph_u64 *)src; -#elif SPH_BIG_ENDIAN -#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM - sph_u64 tmp; - - __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); - return tmp; -/* - * Not worth it generally. - * -#elif SPH_PPC32_GCC && !SPH_NO_ASM - return (sph_u64)sph_dec32le_aligned(src) - | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); -#elif SPH_PPC64_GCC && !SPH_NO_ASM - sph_u64 tmp; - - __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); - return tmp; - */ -#else - return sph_bswap64(*(const sph_u64 *)src); -#endif -#else - return (sph_u64)(((const unsigned char *)src)[0]) - | ((sph_u64)(((const unsigned char *)src)[1]) << 8) - | ((sph_u64)(((const unsigned char *)src)[2]) << 16) - | ((sph_u64)(((const unsigned char *)src)[3]) << 24) - | ((sph_u64)(((const unsigned char *)src)[4]) << 32) - | ((sph_u64)(((const unsigned char *)src)[5]) << 40) - | ((sph_u64)(((const unsigned char *)src)[6]) << 48) - | ((sph_u64)(((const unsigned char *)src)[7]) << 56); -#endif -} - -#endif - -#endif /* Doxygen excluded block */ - -#endif diff --git a/yespower-1.0.1-power2b/sysendian-p2b.h b/yespower-1.0.1-power2b/sysendian-p2b.h deleted file mode 100644 index 52c1fe73b..000000000 --- a/yespower-1.0.1-power2b/sysendian-p2b.h +++ /dev/null @@ -1,94 +0,0 @@ -/*- - * Copyright 2007-2014 Colin Percival - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _SYSENDIAN_H_ -#define _SYSENDIAN_H_ - -#include - -/* Avoid namespace collisions with BSD . */ -#define be32dec libcperciva_be32dec -#define be32enc libcperciva_be32enc -#define be64enc libcperciva_be64enc -#define le32dec libcperciva_le32dec -#define le32enc libcperciva_le32enc - -static inline uint32_t -be32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + - ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); -} - -static inline void -be32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[3] = x & 0xff; - p[2] = (x >> 8) & 0xff; - p[1] = (x >> 16) & 0xff; - p[0] = (x >> 24) & 0xff; -} - -static inline void -be64enc(void * pp, uint64_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[7] = x & 0xff; - p[6] = (x >> 8) & 0xff; - p[5] = (x >> 16) & 0xff; - p[4] = (x >> 24) & 0xff; - p[3] = (x >> 32) & 0xff; - p[2] = (x >> 40) & 0xff; - p[1] = (x >> 48) & 0xff; - p[0] = (x >> 56) & 0xff; -} - -static inline uint32_t -le32dec(const void * pp) -{ - const uint8_t * p = (uint8_t const *)pp; - - return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + - ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); -} - -static inline void -le32enc(void * pp, uint32_t x) -{ - uint8_t * p = (uint8_t *)pp; - - p[0] = x & 0xff; - p[1] = (x >> 8) & 0xff; - p[2] = (x >> 16) & 0xff; - p[3] = (x >> 24) & 0xff; -} - -#endif /* !_SYSENDIAN_H_ */ diff --git a/yespower-1.0.1-power2b/tests.c b/yespower-1.0.1-power2b/tests.c deleted file mode 100644 index 907892de3..000000000 --- a/yespower-1.0.1-power2b/tests.c +++ /dev/null @@ -1,194 +0,0 @@ -/*- - * Copyright 2013-2018 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include - -#include "yespower.h" - -#undef TEST_PBKDF2_SHA256_P2B - -#ifdef TEST_PBKDF2_SHA256_P2B -#include - -#include "sha256.h" - -static void print_PBKDF2_SHA256_P2B_raw(const char *passwd, size_t passwdlen, - const char *salt, size_t saltlen, uint64_t c, size_t dkLen) -{ - uint8_t dk[64]; - size_t i; - - assert(dkLen <= sizeof(dk)); - - /* XXX This prints the strings truncated at first NUL */ - printf("PBKDF2_SHA256_P2B(\"%s\", \"%s\", %llu, %llu) = ", - passwd, salt, (unsigned long long)c, (unsigned long long)dkLen); - - PBKDF2_SHA256_P2B((const uint8_t *) passwd, passwdlen, - (const uint8_t *) salt, saltlen, c, dk, dkLen); - - for (i = 0; i < dkLen; i++) - printf("%02x%c", dk[i], i < dkLen - 1 ? ' ' : '\n'); -} - -static void print_PBKDF2_SHA256_P2B(const char *passwd, - const char *salt, uint64_t c, size_t dkLen) -{ - print_PBKDF2_SHA256_P2B_raw(passwd, strlen(passwd), salt, strlen(salt), c, - dkLen); -} -#endif - -static const char *pers_bsty_magic = "BSTY"; - -static void print_yespower(yespower_version_t version, uint32_t N, uint32_t r, - const char *pers) -{ - yespower_params_t params = { - .version = version, - .N = N, - .r = r, - .pers = (const uint8_t *)pers, - .perslen = pers ? strlen(pers) : 0 - }; - uint8_t src[80]; - yespower_binary_t_p2b dst; - size_t i; - - const char *q = (pers && pers != pers_bsty_magic) ? "\"": ""; - printf("yespower(%u, %u, %u, %s%s%s) = ", (unsigned int)version, N, r, - q, pers ? pers : "NULL", q); - - for (i = 0; i < sizeof(src); i++) - src[i] = i * 3; - - if (pers == pers_bsty_magic) { - params.pers = src; - params.perslen = sizeof(src); - } - - if (yespower_tls_p2b(src, sizeof(src), ¶ms, &dst)) { - puts("FAILED"); - return; - } - - for (i = 0; i < sizeof(dst); i++) - printf("%02x%c", dst.uc[i], i < sizeof(dst) - 1 ? ' ' : '\n'); -} - -static void print_yespower_loop(yespower_version_t version, const char *pers) -{ - uint32_t N, r; - uint8_t src[80]; - yespower_binary_t_p2b dst, xor = {{0}}; - size_t i; - - printf("XOR of yespower(%u, ...) = ", (unsigned int)version); - - /* - * This value of src is chosen to trigger duplicate index in the last - * SMix2 invocation in yespower 0.5 for N=2048 with at least one of the - * values of r below. This is needed to test that a non-save version - * of BlockMix is used in that special case. Most other values of src - * would leave this untested. - */ - src[0] = 43; - for (i = 1; i < sizeof(src); i++) - src[i] = i * 3; - - for (N = 1024; N <= 4096; N <<= 1) { - for (r = 8; r <= 32; r++) { - yespower_params_t params = { - .version = version, - .N = N, - .r = r, - .pers = (const uint8_t *)pers, - .perslen = pers ? strlen(pers) : 0 - }; - if (yespower_tls_p2b(src, sizeof(src), ¶ms, &dst)) { - puts("FAILED"); - return; - } - for (i = 0; i < sizeof(xor); i++) - xor.uc[i] ^= dst.uc[i]; - } - } - - for (i = 0; i < sizeof(xor); i++) - printf("%02x%c", xor.uc[i], i < sizeof(xor) - 1 ? ' ' : '\n'); -} - -int main(void) -{ - setvbuf(stdout, NULL, _IOLBF, 0); - -#ifdef TEST_PBKDF2_SHA256_P2B - print_PBKDF2_SHA256_P2B("password", "salt", 1, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 2, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 16777216, 20); - print_PBKDF2_SHA256_P2B("passwordPASSWORDpassword", - "saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 25); - print_PBKDF2_SHA256_P2B_raw("pass\0word", 9, "sa\0lt", 5, 4096, 16); -#if 0 - print_PBKDF2_SHA256_P2B("password", "salt", 1, 32); - print_PBKDF2_SHA256_P2B("password", "salt", 2, 32); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 32); - print_PBKDF2_SHA256_P2B("password", "salt", 16777216, 32); - print_PBKDF2_SHA256_P2B("passwordPASSWORDpassword", - "saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 40); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 16); - print_PBKDF2_SHA256_P2B("password", "salt", 1, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 2, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 16777216, 20); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 25); - print_PBKDF2_SHA256_P2B("password", "salt", 4096, 16); -#endif -#endif - - print_yespower(YESPOWER_0_5, 2048, 8, "Client Key"); /* yescrypt 0.5 */ - print_yespower(YESPOWER_0_5, 2048, 8, pers_bsty_magic); /* BSTY */ - print_yespower(YESPOWER_0_5, 4096, 16, "Client Key"); /* Cryply */ - print_yespower(YESPOWER_0_5, 4096, 24, "Jagaricoin"); - print_yespower(YESPOWER_0_5, 4096, 32, "WaviBanana"); - print_yespower(YESPOWER_0_5, 2048, 32, "Client Key"); - print_yespower(YESPOWER_0_5, 1024, 32, "Client Key"); - - print_yespower(YESPOWER_0_5, 2048, 8, NULL); /* no personality */ - - print_yespower(YESPOWER_1_0, 2048, 8, NULL); - print_yespower(YESPOWER_1_0, 4096, 16, NULL); - print_yespower(YESPOWER_1_0, 4096, 32, NULL); - print_yespower(YESPOWER_1_0, 2048, 32, NULL); - print_yespower(YESPOWER_1_0, 1024, 32, NULL); - - print_yespower(YESPOWER_1_0, 1024, 32, "personality test"); - - print_yespower_loop(YESPOWER_0_5, "Client Key"); - print_yespower_loop(YESPOWER_1_0, NULL); - - print_yespower(YESPOWER_1_0, 2048, 32, "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"); // SUGAR - print_yespower(YESPOWER_1_0_BLAKE2B, 2048, 32, "Satoshi Nakamoto 31/Oct/2008 Proof-of-work is essentially one-CPU-one-vote"); // TEST - print_yespower(YESPOWER_1_0_BLAKE2B, 2048, 32, "Now I am become Death, the destroyer of worlds"); // MBC - - return 0; -} diff --git a/yespower-1.0.1-power2b/yespower-opt-p2b.c b/yespower-1.0.1-power2b/yespower-opt-p2b.c deleted file mode 100644 index 6fcf11881..000000000 --- a/yespower-1.0.1-power2b/yespower-opt-p2b.c +++ /dev/null @@ -1,1176 +0,0 @@ -/*- - * Copyright 2009 Colin Percival - * Copyright 2012-2019 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - * - * This is a proof-of-work focused fork of yescrypt, including optimized and - * cut-down implementation of the obsolete yescrypt 0.5 (based off its first - * submission to PHC back in 2014) and a new proof-of-work specific variation - * known as yespower 1.0. The former is intended as an upgrade for - * cryptocurrencies that already use yescrypt 0.5 and the latter may be used - * as a further upgrade (hard fork) by those and other cryptocurrencies. The - * version of algorithm to use is requested through parameters, allowing for - * both algorithms to co-exist in client and miner implementations (such as in - * preparation for a hard-fork). - */ - -#ifndef _YESPOWER_OPT_C_PASS_ -#define _YESPOWER_OPT_C_PASS_ 1 -#endif - -#if _YESPOWER_OPT_C_PASS_ == 1 -/* - * AVX and especially XOP speed up Salsa20 a lot, but needlessly result in - * extra instruction prefixes for pwxform (which we make more use of). While - * no slowdown from the prefixes is generally observed on AMD CPUs supporting - * XOP, some slowdown is sometimes observed on Intel CPUs with AVX. - */ -#ifdef __XOP__ -#warning "Note: XOP is enabled. That's great." -#elif defined(__AVX__) -#warning "Note: AVX is enabled. That's OK." -#elif defined(__SSE2__) -#warning "Note: AVX and XOP are not enabled. That's OK." -#elif defined(__x86_64__) || defined(__i386__) -#warning "SSE2 not enabled. Expect poor performance." -#else -#warning "Note: building generic code for non-x86. That's OK." -#endif - -/* - * The SSE4 code version has fewer instructions than the generic SSE2 version, - * but all of the instructions are SIMD, thereby wasting the scalar execution - * units. Thus, the generic SSE2 version below actually runs faster on some - * CPUs due to its balanced mix of SIMD and scalar instructions. - */ -#undef USE_SSE4_FOR_32BIT - -#ifdef __SSE2__ -/* - * GCC before 4.9 would by default unnecessarily use store/load (without - * SSE4.1) or (V)PEXTR (with SSE4.1 or AVX) instead of simply (V)MOV. - * This was tracked as GCC bug 54349. - * "-mtune=corei7" works around this, but is only supported for GCC 4.6+. - * We use inline asm for pre-4.6 GCC, further down this file. - */ -#if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && __GNUC_MINOR__ < 9 && \ - !defined(__clang__) && !defined(__ICC) -#pragma GCC target ("tune=corei7") -#endif -#include -#ifdef __XOP__ -#include -#endif -#elif defined(__SSE__) -#include -#endif - -#include -#include -#include -#include - -#include "insecure_memzero-p2b.h" -#include "sha256-p2b.h" -#include "sysendian-p2b.h" - -#include "yespower-p2b.h" - -#include "yespower-platform-p2b.c" - -#if __STDC_VERSION__ >= 199901L -/* Have restrict */ -#elif defined(__GNUC__) -#define restrict __restrict -#else -#define restrict -#endif - -#ifdef __GNUC__ -#define unlikely(exp) __builtin_expect(exp, 0) -#else -#define unlikely(exp) (exp) -#endif - -#ifdef __SSE__ -#define PREFETCH(x, hint) _mm_prefetch((const char *)(x), (hint)); -#else -#undef PREFETCH -#endif - -typedef union { - uint32_t w[16]; - uint64_t d[8]; -#ifdef __SSE2__ - __m128i q[4]; -#endif -} salsa20_blk_t; - -static inline void salsa20_simd_shuffle(const salsa20_blk_t *Bin, - salsa20_blk_t *Bout) -{ -#define COMBINE(out, in1, in2) \ - Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); - COMBINE(0, 0, 2) - COMBINE(1, 5, 7) - COMBINE(2, 2, 4) - COMBINE(3, 7, 1) - COMBINE(4, 4, 6) - COMBINE(5, 1, 3) - COMBINE(6, 6, 0) - COMBINE(7, 3, 5) -#undef COMBINE -} - -static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, - salsa20_blk_t *Bout) -{ -#define UNCOMBINE(out, in1, in2) \ - Bout->w[out * 2] = Bin->d[in1]; \ - Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; - UNCOMBINE(0, 0, 6) - UNCOMBINE(1, 5, 3) - UNCOMBINE(2, 2, 0) - UNCOMBINE(3, 7, 5) - UNCOMBINE(4, 4, 2) - UNCOMBINE(5, 1, 7) - UNCOMBINE(6, 6, 4) - UNCOMBINE(7, 3, 1) -#undef UNCOMBINE -} - -#ifdef __SSE2__ -#define DECL_X \ - __m128i X0, X1, X2, X3; -#define DECL_Y \ - __m128i Y0, Y1, Y2, Y3; -#define READ_X(in) \ - X0 = (in).q[0]; X1 = (in).q[1]; X2 = (in).q[2]; X3 = (in).q[3]; -#define WRITE_X(out) \ - (out).q[0] = X0; (out).q[1] = X1; (out).q[2] = X2; (out).q[3] = X3; - -#ifdef __XOP__ -#define ARX(out, in1, in2, s) \ - out = _mm_xor_si128(out, _mm_roti_epi32(_mm_add_epi32(in1, in2), s)); -#else -#define ARX(out, in1, in2, s) { \ - __m128i tmp = _mm_add_epi32(in1, in2); \ - out = _mm_xor_si128(out, _mm_slli_epi32(tmp, s)); \ - out = _mm_xor_si128(out, _mm_srli_epi32(tmp, 32 - s)); \ -} -#endif - -#define SALSA20_2ROUNDS \ - /* Operate on "columns" */ \ - ARX(X1, X0, X3, 7) \ - ARX(X2, X1, X0, 9) \ - ARX(X3, X2, X1, 13) \ - ARX(X0, X3, X2, 18) \ - /* Rearrange data */ \ - X1 = _mm_shuffle_epi32(X1, 0x93); \ - X2 = _mm_shuffle_epi32(X2, 0x4E); \ - X3 = _mm_shuffle_epi32(X3, 0x39); \ - /* Operate on "rows" */ \ - ARX(X3, X0, X1, 7) \ - ARX(X2, X3, X0, 9) \ - ARX(X1, X2, X3, 13) \ - ARX(X0, X1, X2, 18) \ - /* Rearrange data */ \ - X1 = _mm_shuffle_epi32(X1, 0x39); \ - X2 = _mm_shuffle_epi32(X2, 0x4E); \ - X3 = _mm_shuffle_epi32(X3, 0x93); - -/** - * Apply the Salsa20 core to the block provided in (X0 ... X3). - */ -#define SALSA20_wrapper(out, rounds) { \ - __m128i Z0 = X0, Z1 = X1, Z2 = X2, Z3 = X3; \ - rounds \ - (out).q[0] = X0 = _mm_add_epi32(X0, Z0); \ - (out).q[1] = X1 = _mm_add_epi32(X1, Z1); \ - (out).q[2] = X2 = _mm_add_epi32(X2, Z2); \ - (out).q[3] = X3 = _mm_add_epi32(X3, Z3); \ -} - -/** - * Apply the Salsa20/2 core to the block provided in X. - */ -#define SALSA20_2(out) \ - SALSA20_wrapper(out, SALSA20_2ROUNDS) - -#define SALSA20_8ROUNDS \ - SALSA20_2ROUNDS SALSA20_2ROUNDS SALSA20_2ROUNDS SALSA20_2ROUNDS - -/** - * Apply the Salsa20/8 core to the block provided in X. - */ -#define SALSA20_8(out) \ - SALSA20_wrapper(out, SALSA20_8ROUNDS) - -#define XOR_X(in) \ - X0 = _mm_xor_si128(X0, (in).q[0]); \ - X1 = _mm_xor_si128(X1, (in).q[1]); \ - X2 = _mm_xor_si128(X2, (in).q[2]); \ - X3 = _mm_xor_si128(X3, (in).q[3]); - -#define XOR_X_2(in1, in2) \ - X0 = _mm_xor_si128((in1).q[0], (in2).q[0]); \ - X1 = _mm_xor_si128((in1).q[1], (in2).q[1]); \ - X2 = _mm_xor_si128((in1).q[2], (in2).q[2]); \ - X3 = _mm_xor_si128((in1).q[3], (in2).q[3]); - -#define XOR_X_WRITE_XOR_Y_2(out, in) \ - (out).q[0] = Y0 = _mm_xor_si128((out).q[0], (in).q[0]); \ - (out).q[1] = Y1 = _mm_xor_si128((out).q[1], (in).q[1]); \ - (out).q[2] = Y2 = _mm_xor_si128((out).q[2], (in).q[2]); \ - (out).q[3] = Y3 = _mm_xor_si128((out).q[3], (in).q[3]); \ - X0 = _mm_xor_si128(X0, Y0); \ - X1 = _mm_xor_si128(X1, Y1); \ - X2 = _mm_xor_si128(X2, Y2); \ - X3 = _mm_xor_si128(X3, Y3); - -#define INTEGERIFY _mm_cvtsi128_si32(X0) - -#else /* !defined(__SSE2__) */ - -#define DECL_X \ - salsa20_blk_t X; -#define DECL_Y \ - salsa20_blk_t Y; - -#define COPY(out, in) \ - (out).d[0] = (in).d[0]; \ - (out).d[1] = (in).d[1]; \ - (out).d[2] = (in).d[2]; \ - (out).d[3] = (in).d[3]; \ - (out).d[4] = (in).d[4]; \ - (out).d[5] = (in).d[5]; \ - (out).d[6] = (in).d[6]; \ - (out).d[7] = (in).d[7]; - -#define READ_X(in) COPY(X, in) -#define WRITE_X(out) COPY(out, X) - -/** - * salsa20(B): - * Apply the Salsa20 core to the provided block. - */ -static inline void salsa20(salsa20_blk_t *restrict B, - salsa20_blk_t *restrict Bout, uint32_t doublerounds) -{ - salsa20_blk_t X; -#define x X.w - - salsa20_simd_unshuffle(B, &X); - - do { -#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) - /* Operate on columns */ - x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); - x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); - - x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); - x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); - - x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); - x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); - - x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); - x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); - - /* Operate on rows */ - x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); - x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); - - x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); - x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); - - x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); - x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); - - x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); - x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); -#undef R - } while (--doublerounds); -#undef x - - { - uint32_t i; - salsa20_simd_shuffle(&X, Bout); - for (i = 0; i < 16; i += 4) { - B->w[i] = Bout->w[i] += B->w[i]; - B->w[i + 1] = Bout->w[i + 1] += B->w[i + 1]; - B->w[i + 2] = Bout->w[i + 2] += B->w[i + 2]; - B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; - } - } -} - -/** - * Apply the Salsa20/2 core to the block provided in X. - */ -#define SALSA20_2(out) \ - salsa20(&X, &out, 1); - -/** - * Apply the Salsa20/8 core to the block provided in X. - */ -#define SALSA20_8(out) \ - salsa20(&X, &out, 4); - -#define XOR(out, in1, in2) \ - (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ - (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ - (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ - (out).d[3] = (in1).d[3] ^ (in2).d[3]; \ - (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ - (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ - (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ - (out).d[7] = (in1).d[7] ^ (in2).d[7]; - -#define XOR_X(in) XOR(X, X, in) -#define XOR_X_2(in1, in2) XOR(X, in1, in2) -#define XOR_X_WRITE_XOR_Y_2(out, in) \ - XOR(Y, out, in) \ - COPY(out, Y) \ - XOR(X, X, Y) - -#define INTEGERIFY (uint32_t)X.d[0] -#endif - -/** - * Apply the Salsa20 core to the block provided in X ^ in. - */ -#define SALSA20_XOR_MEM(in, out) \ - XOR_X(in) \ - SALSA20(out) - -#define SALSA20 SALSA20_8 -#else /* pass 2 */ -#undef SALSA20 -#define SALSA20 SALSA20_2 -#endif - -/** - * blockmix_salsa(Bin, Bout): - * Compute Bout = BlockMix_{salsa20, 1}(Bin). The input Bin must be 128 - * bytes in length; the output Bout must also be the same size. - */ -static inline void blockmix_salsa(const salsa20_blk_t *restrict Bin, - salsa20_blk_t *restrict Bout) -{ - DECL_X - - READ_X(Bin[1]) - SALSA20_XOR_MEM(Bin[0], Bout[0]) - SALSA20_XOR_MEM(Bin[1], Bout[1]) -} - -static inline uint32_t blockmix_salsa_xor(const salsa20_blk_t *restrict Bin1, - const salsa20_blk_t *restrict Bin2, salsa20_blk_t *restrict Bout) -{ - DECL_X - - XOR_X_2(Bin1[1], Bin2[1]) - XOR_X(Bin1[0]) - SALSA20_XOR_MEM(Bin2[0], Bout[0]) - XOR_X(Bin1[1]) - SALSA20_XOR_MEM(Bin2[1], Bout[1]) - - return INTEGERIFY; -} - -#if _YESPOWER_OPT_C_PASS_ == 1 -/* This is tunable, but it is part of what defines a yespower_p2b version */ -/* Version 0.5 */ -#define Swidth_0_5 8 -/* Version 1.0 */ -#define Swidth_1_0 11 - -/* Not tunable in this implementation, hard-coded in a few places */ -#define PWXsimple 2 -#define PWXgather 4 - -/* Derived value. Not tunable on its own. */ -#define PWXbytes (PWXgather * PWXsimple * 8) - -/* (Maybe-)runtime derived values. Not tunable on their own. */ -#define Swidth_to_Sbytes1_P2b(Swidth) ((1 << (Swidth)) * PWXsimple * 8) -#define Swidth_to_Smask(Swidth) (((1 << (Swidth)) - 1) * PWXsimple * 8) -#define Smask_to_Smask2(Smask) (((uint64_t)(Smask) << 32) | (Smask)) - -/* These should be compile-time derived */ -#define Smask2_0_5 Smask_to_Smask2(Swidth_to_Smask(Swidth_0_5)) -#define Smask2_1_0 Smask_to_Smask2(Swidth_to_Smask(Swidth_1_0)) - -typedef struct { - uint8_t *S0, *S1, *S2; - size_t w; - uint32_t Sbytes; -} pwxform_ctx_t; - -#define DECL_SMASK2REG /* empty */ -#define MAYBE_MEMORY_BARRIER /* empty */ - -#ifdef __SSE2__ -/* - * (V)PSRLDQ and (V)PSHUFD have higher throughput than (V)PSRLQ on some CPUs - * starting with Sandy Bridge. Additionally, PSHUFD uses separate source and - * destination registers, whereas the shifts would require an extra move - * instruction for our code when building without AVX. Unfortunately, PSHUFD - * is much slower on Conroe (4 cycles latency vs. 1 cycle latency for PSRLQ) - * and somewhat slower on some non-Intel CPUs (luckily not including AMD - * Bulldozer and Piledriver). - */ -#ifdef __AVX__ -#define HI32(X) \ - _mm_srli_si128((X), 4) -#elif 1 /* As an option, check for __SSE4_1__ here not to hurt Conroe */ -#define HI32(X) \ - _mm_shuffle_epi32((X), _MM_SHUFFLE(2,3,0,1)) -#else -#define HI32(X) \ - _mm_srli_epi64((X), 32) -#endif - -#if defined(__x86_64__) && \ - __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__ICC) -#ifdef __AVX__ -#define MOVQ "vmovq" -#else -/* "movq" would be more correct, but "movd" is supported by older binutils - * due to an error in AMD's spec for x86-64. */ -#define MOVQ "movd" -#endif -#define EXTRACT64(X) ({ \ - uint64_t result; \ - __asm__(MOVQ " %1, %0" : "=r" (result) : "x" (X)); \ - result; \ -}) -#elif defined(__x86_64__) && !defined(_MSC_VER) && !defined(__OPEN64__) -/* MSVC and Open64 had bugs */ -#define EXTRACT64(X) _mm_cvtsi128_si64(X) -#elif defined(__x86_64__) && defined(__SSE4_1__) -/* No known bugs for this intrinsic */ -#include -#define EXTRACT64(X) _mm_extract_epi64((X), 0) -#elif defined(USE_SSE4_FOR_32BIT) && defined(__SSE4_1__) -/* 32-bit */ -#include -#if 0 -/* This is currently unused by the code below, which instead uses these two - * intrinsics explicitly when (!defined(__x86_64__) && defined(__SSE4_1__)) */ -#define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_extract_epi32((X), 1) << 32)) -#endif -#else -/* 32-bit or compilers with known past bugs in _mm_cvtsi128_si64() */ -#define EXTRACT64(X) \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ - ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) -#endif - -#if defined(__x86_64__) && (defined(__AVX__) || !defined(__GNUC__)) -/* 64-bit with AVX */ -/* Force use of 64-bit AND instead of two 32-bit ANDs */ -#undef DECL_SMASK2REG -#if defined(__GNUC__) && !defined(__ICC) -#define DECL_SMASK2REG uint64_t Smask2reg = Smask2; -/* Force use of lower-numbered registers to reduce number of prefixes, relying - * on out-of-order execution and register renaming. */ -#define FORCE_REGALLOC_1 \ - __asm__("" : "=a" (x), "+d" (Smask2reg), "+S" (S0), "+D" (S1)); -#define FORCE_REGALLOC_2 \ - __asm__("" : : "c" (lo)); -#else -static volatile uint64_t Smask2var = Smask2; -#define DECL_SMASK2REG uint64_t Smask2reg = Smask2var; -#define FORCE_REGALLOC_1 /* empty */ -#define FORCE_REGALLOC_2 /* empty */ -#endif -#define PWXFORM_SIMD(X) { \ - uint64_t x; \ - FORCE_REGALLOC_1 \ - uint32_t lo = x = EXTRACT64(X) & Smask2reg; \ - FORCE_REGALLOC_2 \ - uint32_t hi = x >> 32; \ - X = _mm_mul_epu32(HI32(X), X); \ - X = _mm_add_epi64(X, *(__m128i *)(S0 + lo)); \ - X = _mm_xor_si128(X, *(__m128i *)(S1 + hi)); \ -} -#elif defined(__x86_64__) -/* 64-bit without AVX. This relies on out-of-order execution and register - * renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g., - * it runs great on Haswell. */ -#warning "Note: using x86-64 inline assembly for pwxform. That's great." -#undef MAYBE_MEMORY_BARRIER -#define MAYBE_MEMORY_BARRIER \ - __asm__("" : : : "memory"); -#ifdef __ILP32__ /* x32 */ -#define REGISTER_PREFIX "e" -#else -#define REGISTER_PREFIX "r" -#endif -#define PWXFORM_SIMD(X) { \ - __m128i H; \ - __asm__( \ - "movd %0, %%rax\n\t" \ - "pshufd $0xb1, %0, %1\n\t" \ - "andq %2, %%rax\n\t" \ - "pmuludq %1, %0\n\t" \ - "movl %%eax, %%ecx\n\t" \ - "shrq $0x20, %%rax\n\t" \ - "paddq (%3,%%" REGISTER_PREFIX "cx), %0\n\t" \ - "pxor (%4,%%" REGISTER_PREFIX "ax), %0\n\t" \ - : "+x" (X), "=x" (H) \ - : "d" (Smask2), "S" (S0), "D" (S1) \ - : "cc", "ax", "cx"); \ -} -#elif defined(USE_SSE4_FOR_32BIT) && defined(__SSE4_1__) -/* 32-bit with SSE4.1 */ -#define PWXFORM_SIMD(X) { \ - __m128i x = _mm_and_si128(X, _mm_set1_epi64x(Smask2)); \ - __m128i s0 = *(__m128i *)(S0 + (uint32_t)_mm_cvtsi128_si32(x)); \ - __m128i s1 = *(__m128i *)(S1 + (uint32_t)_mm_extract_epi32(x, 1)); \ - X = _mm_mul_epu32(HI32(X), X); \ - X = _mm_add_epi64(X, s0); \ - X = _mm_xor_si128(X, s1); \ -} -#else -/* 32-bit without SSE4.1 */ -#define PWXFORM_SIMD(X) { \ - uint64_t x = EXTRACT64(X) & Smask2; \ - __m128i s0 = *(__m128i *)(S0 + (uint32_t)x); \ - __m128i s1 = *(__m128i *)(S1 + (x >> 32)); \ - X = _mm_mul_epu32(HI32(X), X); \ - X = _mm_add_epi64(X, s0); \ - X = _mm_xor_si128(X, s1); \ -} -#endif - -#define PWXFORM_SIMD_WRITE(X, Sw) \ - PWXFORM_SIMD(X) \ - MAYBE_MEMORY_BARRIER \ - *(__m128i *)(Sw + w) = X; \ - MAYBE_MEMORY_BARRIER - -#define PWXFORM_ROUND \ - PWXFORM_SIMD(X0) \ - PWXFORM_SIMD(X1) \ - PWXFORM_SIMD(X2) \ - PWXFORM_SIMD(X3) - -#define PWXFORM_ROUND_WRITE4 \ - PWXFORM_SIMD_WRITE(X0, S0) \ - PWXFORM_SIMD_WRITE(X1, S1) \ - w += 16; \ - PWXFORM_SIMD_WRITE(X2, S0) \ - PWXFORM_SIMD_WRITE(X3, S1) \ - w += 16; - -#define PWXFORM_ROUND_WRITE2 \ - PWXFORM_SIMD_WRITE(X0, S0) \ - PWXFORM_SIMD_WRITE(X1, S1) \ - w += 16; \ - PWXFORM_SIMD(X2) \ - PWXFORM_SIMD(X3) - -#else /* !defined(__SSE2__) */ - -#define PWXFORM_SIMD(x0, x1) { \ - uint64_t x = x0 & Smask2; \ - uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ - uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ - x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ - x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ -} - -#define PWXFORM_SIMD_WRITE(x0, x1, Sw) \ - PWXFORM_SIMD(x0, x1) \ - ((uint64_t *)(Sw + w))[0] = x0; \ - ((uint64_t *)(Sw + w))[1] = x1; - -#define PWXFORM_ROUND \ - PWXFORM_SIMD(X.d[0], X.d[1]) \ - PWXFORM_SIMD(X.d[2], X.d[3]) \ - PWXFORM_SIMD(X.d[4], X.d[5]) \ - PWXFORM_SIMD(X.d[6], X.d[7]) - -#define PWXFORM_ROUND_WRITE4 \ - PWXFORM_SIMD_WRITE(X.d[0], X.d[1], S0) \ - PWXFORM_SIMD_WRITE(X.d[2], X.d[3], S1) \ - w += 16; \ - PWXFORM_SIMD_WRITE(X.d[4], X.d[5], S0) \ - PWXFORM_SIMD_WRITE(X.d[6], X.d[7], S1) \ - w += 16; - -#define PWXFORM_ROUND_WRITE2 \ - PWXFORM_SIMD_WRITE(X.d[0], X.d[1], S0) \ - PWXFORM_SIMD_WRITE(X.d[2], X.d[3], S1) \ - w += 16; \ - PWXFORM_SIMD(X.d[4], X.d[5]) \ - PWXFORM_SIMD(X.d[6], X.d[7]) -#endif - -#define PWXFORM \ - PWXFORM_ROUND PWXFORM_ROUND PWXFORM_ROUND \ - PWXFORM_ROUND PWXFORM_ROUND PWXFORM_ROUND - -#define Smask2 Smask2_0_5 - -#else /* pass 2 */ - -#undef PWXFORM -#define PWXFORM \ - PWXFORM_ROUND_WRITE4 PWXFORM_ROUND_WRITE2 PWXFORM_ROUND_WRITE2 \ - w &= Smask2; \ - { \ - uint8_t *Stmp = S2; \ - S2 = S1; \ - S1 = S0; \ - S0 = Stmp; \ - } - -#undef Smask2 -#define Smask2 Smask2_1_0 - -#endif - -/** - * blockmix_pwxform(Bin, Bout, r, S): - * Compute Bout = BlockMix_pwxform{salsa20, r, S}(Bin). The input Bin must - * be 128r bytes in length; the output Bout must also be the same size. - */ -static void blockmix(const salsa20_blk_t *restrict Bin, - salsa20_blk_t *restrict Bout, size_t r, pwxform_ctx_t *restrict ctx) -{ - if (unlikely(!ctx)) { - blockmix_salsa(Bin, Bout); - return; - } - - uint8_t *S0 = ctx->S0, *S1 = ctx->S1; -#if _YESPOWER_OPT_C_PASS_ > 1 - uint8_t *S2 = ctx->S2; - size_t w = ctx->w; -#endif - size_t i; - DECL_X - - /* Convert count of 128-byte blocks to max index of 64-byte block */ - r = r * 2 - 1; - - READ_X(Bin[r]) - - DECL_SMASK2REG - - i = 0; - do { - XOR_X(Bin[i]) - PWXFORM - if (unlikely(i >= r)) - break; - WRITE_X(Bout[i]) - i++; - } while (1); - -#if _YESPOWER_OPT_C_PASS_ > 1 - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; - ctx->w = w; -#endif - - SALSA20(Bout[i]) -} - -static uint32_t blockmix_xor(const salsa20_blk_t *restrict Bin1, - const salsa20_blk_t *restrict Bin2, salsa20_blk_t *restrict Bout, - size_t r, pwxform_ctx_t *restrict ctx) -{ - if (unlikely(!ctx)) - return blockmix_salsa_xor(Bin1, Bin2, Bout); - - uint8_t *S0 = ctx->S0, *S1 = ctx->S1; -#if _YESPOWER_OPT_C_PASS_ > 1 - uint8_t *S2 = ctx->S2; - size_t w = ctx->w; -#endif - size_t i; - DECL_X - - /* Convert count of 128-byte blocks to max index of 64-byte block */ - r = r * 2 - 1; - -#ifdef PREFETCH - PREFETCH(&Bin2[r], _MM_HINT_T0) - for (i = 0; i < r; i++) { - PREFETCH(&Bin2[i], _MM_HINT_T0) - } -#endif - - XOR_X_2(Bin1[r], Bin2[r]) - - DECL_SMASK2REG - - i = 0; - r--; - do { - XOR_X(Bin1[i]) - XOR_X(Bin2[i]) - PWXFORM - WRITE_X(Bout[i]) - - XOR_X(Bin1[i + 1]) - XOR_X(Bin2[i + 1]) - PWXFORM - - if (unlikely(i >= r)) - break; - - WRITE_X(Bout[i + 1]) - - i += 2; - } while (1); - i++; - -#if _YESPOWER_OPT_C_PASS_ > 1 - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; - ctx->w = w; -#endif - - SALSA20(Bout[i]) - - return INTEGERIFY; -} - -static uint32_t blockmix_xor_save(salsa20_blk_t *restrict Bin1out, - salsa20_blk_t *restrict Bin2, - size_t r, pwxform_ctx_t *restrict ctx) -{ - uint8_t *S0 = ctx->S0, *S1 = ctx->S1; -#if _YESPOWER_OPT_C_PASS_ > 1 - uint8_t *S2 = ctx->S2; - size_t w = ctx->w; -#endif - size_t i; - DECL_X - DECL_Y - - /* Convert count of 128-byte blocks to max index of 64-byte block */ - r = r * 2 - 1; - -#ifdef PREFETCH - PREFETCH(&Bin2[r], _MM_HINT_T0) - for (i = 0; i < r; i++) { - PREFETCH(&Bin2[i], _MM_HINT_T0) - } -#endif - - XOR_X_2(Bin1out[r], Bin2[r]) - - DECL_SMASK2REG - - i = 0; - r--; - do { - XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]) - PWXFORM - WRITE_X(Bin1out[i]) - - XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]) - PWXFORM - - if (unlikely(i >= r)) - break; - - WRITE_X(Bin1out[i + 1]) - - i += 2; - } while (1); - i++; - -#if _YESPOWER_OPT_C_PASS_ > 1 - ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; - ctx->w = w; -#endif - - SALSA20(Bin1out[i]) - - return INTEGERIFY; -} - -#if _YESPOWER_OPT_C_PASS_ == 1 -/** - * integerify(B, r): - * Return the result of parsing B_{2r-1} as a little-endian integer. - */ -static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) -{ -/* - * Our 64-bit words are in host byte order, which is why we don't just read - * w[0] here (would be wrong on big-endian). Also, our 32-bit words are - * SIMD-shuffled, but we only care about the least significant 32 bits anyway. - */ - return (uint32_t)B[2 * r - 1].d[0]; -} -#endif - -/** - * smix1(B, r, N, V, XY, S): - * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage XY must be 128r+64 bytes in length. N must be even and at least 4. - * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY - * to a multiple of at least 16 bytes. - */ -static void smix1(uint8_t *B, size_t r, uint32_t N, - salsa20_blk_t *V, salsa20_blk_t *XY, pwxform_ctx_t *ctx) -{ - size_t s = 2 * r; - salsa20_blk_t *X = V, *Y = &V[s], *V_j; - uint32_t i, j, n; - -#if _YESPOWER_OPT_C_PASS_ == 1 - for (i = 0; i < 2 * r; i++) { -#else - for (i = 0; i < 2; i++) { -#endif - const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; - salsa20_blk_t *tmp = Y; - salsa20_blk_t *dst = &X[i]; - size_t k; - for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); - salsa20_simd_shuffle(tmp, dst); - } - -#if _YESPOWER_OPT_C_PASS_ > 1 - for (i = 1; i < r; i++) - blockmix(&X[(i - 1) * 2], &X[i * 2], 1, ctx); -#endif - - blockmix(X, Y, r, ctx); - X = Y + s; - blockmix(Y, X, r, ctx); - j = integerify(X, r); - - for (n = 2; n < N; n <<= 1) { - uint32_t m = (n < N / 2) ? n : (N - 1 - n); - for (i = 1; i < m; i += 2) { - Y = X + s; - j &= n - 1; - j += i - 1; - V_j = &V[j * s]; - j = blockmix_xor(X, V_j, Y, r, ctx); - j &= n - 1; - j += i; - V_j = &V[j * s]; - X = Y + s; - j = blockmix_xor(Y, V_j, X, r, ctx); - } - } - n >>= 1; - - j &= n - 1; - j += N - 2 - n; - V_j = &V[j * s]; - Y = X + s; - j = blockmix_xor(X, V_j, Y, r, ctx); - j &= n - 1; - j += N - 1 - n; - V_j = &V[j * s]; - blockmix_xor(Y, V_j, XY, r, ctx); - - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = &XY[i]; - salsa20_blk_t *tmp = &XY[s]; - salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; - size_t k; - for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); - salsa20_simd_unshuffle(tmp, dst); - } -} - -/** - * smix2(B, r, N, Nloop, V, XY, S): - * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage XY must be 256r bytes in length. N must be a power of 2 and at - * least 2. Nloop must be even. The array V must be aligned to a multiple of - * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. - */ -static void smix2(uint8_t *B, size_t r, uint32_t N, uint32_t Nloop, - salsa20_blk_t *V, salsa20_blk_t *XY, pwxform_ctx_t *ctx) -{ - size_t s = 2 * r; - salsa20_blk_t *X = XY, *Y = &XY[s]; - uint32_t i, j; - - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; - salsa20_blk_t *tmp = Y; - salsa20_blk_t *dst = &X[i]; - size_t k; - for (k = 0; k < 16; k++) - tmp->w[k] = le32dec(&src->w[k]); - salsa20_simd_shuffle(tmp, dst); - } - - j = integerify(X, r) & (N - 1); - -#if _YESPOWER_OPT_C_PASS_ == 1 - if (Nloop > 2) { -#endif - do { - salsa20_blk_t *V_j = &V[j * s]; - j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); - V_j = &V[j * s]; - j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); - } while (Nloop -= 2); -#if _YESPOWER_OPT_C_PASS_ == 1 - } else { - const salsa20_blk_t * V_j = &V[j * s]; - j = blockmix_xor(X, V_j, Y, r, ctx) & (N - 1); - V_j = &V[j * s]; - blockmix_xor(Y, V_j, X, r, ctx); - } -#endif - - for (i = 0; i < 2 * r; i++) { - const salsa20_blk_t *src = &X[i]; - salsa20_blk_t *tmp = Y; - salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; - size_t k; - for (k = 0; k < 16; k++) - le32enc(&tmp->w[k], src->w[k]); - salsa20_simd_unshuffle(tmp, dst); - } -} - -/** - * smix(B, r, N, V, XY, S): - * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the - * temporary storage V must be 128rN bytes in length; the temporary storage - * XY must be 256r bytes in length. N must be a power of 2 and at least 16. - * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY - * to a multiple of at least 16 bytes (aligning them to 64 bytes as well saves - * cache lines, but it might also result in cache bank conflicts). - */ -static void smix(uint8_t *B, size_t r, uint32_t N, - salsa20_blk_t *V, salsa20_blk_t *XY, pwxform_ctx_t *ctx) -{ -#if _YESPOWER_OPT_C_PASS_ == 1 - uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */ - uint32_t Nloop_rw = Nloop_all; - - Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */ - Nloop_rw &= ~(uint32_t)1; /* round down to even */ -#else - uint32_t Nloop_rw = (N + 2) / 3; /* 1/3, round up */ - Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */ -#endif - - smix1(B, 1, ctx->Sbytes / 128, (salsa20_blk_t *)ctx->S0, XY, NULL); - smix1(B, r, N, V, XY, ctx); - smix2(B, r, N, Nloop_rw /* must be > 2 */, V, XY, ctx); -#if _YESPOWER_OPT_C_PASS_ == 1 - if (Nloop_all > Nloop_rw) - smix2(B, r, N, 2, V, XY, ctx); -#endif -} - -#if _YESPOWER_OPT_C_PASS_ == 1 -#undef _YESPOWER_OPT_C_PASS_ -#define _YESPOWER_OPT_C_PASS_ 2 -#define blockmix_salsa blockmix_salsa_1_0 -#define blockmix_salsa_xor blockmix_salsa_xor_1_0 -#define blockmix blockmix_1_0 -#define blockmix_xor blockmix_xor_1_0 -#define blockmix_xor_save blockmix_xor_save_1_0 -#define smix1 smix1_1_0 -#define smix2 smix2_1_0 -#define smix smix_1_0_p2b -#include "yespower-opt-p2b.c" -#undef smix - -/** - * yespower_p2b(local, src, srclen, params, dst): - * Compute yespower_p2b(src[0 .. srclen - 1], N, r), to be checked for "< target". - * local is the thread-local data structure, allowing to preserve and reuse a - * memory allocation across calls, thereby reducing its overhead. - * - * Return 0 on success; or -1 on error. - */ -int yespower_p2b(yespower_local_t *local, - const uint8_t *src, size_t srclen, - const yespower_params_t *params, - yespower_binary_t_p2b *dst) -{ - yespower_version_t version = params->version; - uint32_t N = params->N; - uint32_t r = params->r; - const uint8_t *pers = params->pers; - size_t perslen = params->perslen; - uint32_t Swidth; - size_t B_size, V_size, XY_size, need; - uint8_t *B, *S; - salsa20_blk_t *V, *XY; - pwxform_ctx_t ctx; - uint8_t sha256[32]; - uint8_t blake2b[32]; - - /* Sanity-check parameters */ - if ((version != YESPOWER_0_5 && version != YESPOWER_1_0 && version != YESPOWER_1_0_BLAKE2B) || - N < 1024 || N > 512 * 1024 || r < 8 || r > 32 || - (N & (N - 1)) != 0 || - (!pers && perslen)) { - errno = EINVAL; - goto fail; - } - - /* Allocate memory */ - B_size = (size_t)128 * r; - V_size = B_size * N; - if (version == YESPOWER_0_5) { - XY_size = B_size * 2; - Swidth = Swidth_0_5; - ctx.Sbytes = 2 * Swidth_to_Sbytes1_P2b(Swidth); - } else if (version == YESPOWER_1_0) { - XY_size = B_size + 64; - Swidth = Swidth_1_0; - ctx.Sbytes = 3 * Swidth_to_Sbytes1_P2b(Swidth); - } else if (version == YESPOWER_1_0_BLAKE2B) { - XY_size = B_size + 64; - Swidth = Swidth_1_0; - ctx.Sbytes = 3 * Swidth_to_Sbytes1_P2b(Swidth); - } - need = B_size + V_size + XY_size + ctx.Sbytes; - if (local->aligned_size < need) { - if (free_region(local)) - goto fail; - if (!alloc_region(local, need)) - goto fail; - } - B = (uint8_t *)local->aligned; - V = (salsa20_blk_t *)((uint8_t *)B + B_size); - XY = (salsa20_blk_t *)((uint8_t *)V + V_size); - S = (uint8_t *)XY + XY_size; - ctx.S0 = S; - ctx.S1 = S + Swidth_to_Sbytes1_P2b(Swidth); - - // SHA256_Buf(src, srclen, sha256); // Move to each loop - - if (version == YESPOWER_0_5) { - SHA256_Buf(src, srclen, sha256); - PBKDF2_SHA256_P2B(sha256, sizeof(sha256), src, srclen, 1, - B, B_size); - memcpy(sha256, B, sizeof(sha256)); - smix(B, r, N, V, XY, &ctx); - PBKDF2_SHA256_P2B(sha256, sizeof(sha256), B, B_size, 1, - (uint8_t *)dst, sizeof(*dst)); - - if (pers) { - HMAC_SHA256_Buf_P2b(dst, sizeof(*dst), pers, perslen, - sha256); - SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst); - } - } else if (version == YESPOWER_1_0) { - SHA256_Buf(src, srclen, sha256); - ctx.S2 = S + 2 * Swidth_to_Sbytes1_P2b(Swidth); - ctx.w = 0; - - if (pers) { - src = pers; - srclen = perslen; - } else { - srclen = 0; - } - - PBKDF2_SHA256_P2B(sha256, sizeof(sha256), src, srclen, 1, B, 128); - memcpy(sha256, B, sizeof(sha256)); - smix_1_0_p2b(B, r, N, V, XY, &ctx); - HMAC_SHA256_Buf_P2b(B + B_size - 64, 64, - sha256, sizeof(sha256), (uint8_t *)dst); - } else if (version == YESPOWER_1_0_BLAKE2B) { - blake2b_hash(blake2b, src, srclen); - ctx.S2 = S + 2 * Swidth_to_Sbytes1_P2b(Swidth); - ctx.w = 0; - - if (pers) { - src = pers; - srclen = perslen; - } else { - srclen = 0; - } - - pbkdf2_blake2b(blake2b, sizeof(blake2b), src, srclen, 1, B, 128); - memcpy(blake2b, B, sizeof(blake2b)); - smix_1_0_p2b(B, r, N, V, XY, &ctx); - hmac_blake2b_hash((uint8_t *)dst, B + B_size - 64, 64, blake2b, sizeof(blake2b)); - } - - /* Success! */ - return 0; - -fail: - memset(dst, 0xff, sizeof(*dst)); - return -1; -} - -/** - * yespower_tls_p2b(src, srclen, params, dst): - * Compute yespower_p2b(src[0 .. srclen - 1], N, r), to be checked for "< target". - * The memory allocation is maintained internally using thread-local storage. - * - * Return 0 on success; or -1 on error. - */ -int yespower_tls_p2b(const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t_p2b *dst) -{ - static __thread int initialized = 0; - static __thread yespower_local_t local; - - if (!initialized) { - init_region(&local); - initialized = 1; - } - - return yespower_p2b(&local, src, srclen, params, dst); -} - -int yespower_init_local_p2b(yespower_local_t *local) -{ - init_region(local); - return 0; -} - -int yespower_free_local_p2b(yespower_local_t *local) -{ - return free_region(local); -} -#endif diff --git a/yespower-1.0.1-power2b/yespower-p2b.h b/yespower-1.0.1-power2b/yespower-p2b.h deleted file mode 100644 index c3af7f757..000000000 --- a/yespower-1.0.1-power2b/yespower-p2b.h +++ /dev/null @@ -1,130 +0,0 @@ -/*- - * Copyright 2009 Colin Percival - * Copyright 2013-2018 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - */ -#ifndef _YESPOWER_H_ -#define _YESPOWER_H_ - -#include -#include /* for size_t */ - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Internal type used by the memory allocator. Please do not use it directly. - * Use yespower_local_t instead. - */ -typedef struct { - void *base, *aligned; - size_t base_size, aligned_size; -} yespower_region_t; - -/** - * Type for thread-local (RAM) data structure. - */ -typedef yespower_region_t yespower_local_t; - -/* - * Type for yespower algorithm version numbers. - */ -typedef enum { YESPOWER_0_5 = 5, YESPOWER_1_0 = 10, YESPOWER_1_0_BLAKE2B = 11 } yespower_version_t; - -/** - * yespower parameters combined into one struct. - */ -typedef struct { - yespower_version_t version; - uint32_t N, r; - const uint8_t *pers; - size_t perslen; -} yespower_params_t; - -/** - * A 256-bit yespower hash. - */ -typedef struct { - unsigned char uc[32]; -} yespower_binary_t_p2b; - -/** - * yespower_init_local_p2b(local): - * Initialize the thread-local (RAM) data structure. Actual memory allocation - * is currently fully postponed until a call to yespower(). - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yespower_init_local_p2b(yespower_local_t *local); - -/** - * yespower_free_local_p2b(local): - * Free memory that may have been allocated for an initialized thread-local - * (RAM) data structure. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as local is local to the thread. - */ -extern int yespower_free_local_p2b(yespower_local_t *local); - -/** - * yespower(local, src, srclen, params, dst): - * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target". - * local is the thread-local data structure, allowing to preserve and reuse a - * memory allocation across calls, thereby reducing processing overhead. - * - * Return 0 on success; or -1 on error. - * - * local must be initialized with yespower_init_local_p2b(). - * - * MT-safe as long as local and dst are local to the thread. - */ -extern int yespower(yespower_local_t *local, - const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t_p2b *dst); - -/** - * yespower_tls_p2b(src, srclen, params, dst): - * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target". - * The memory allocation is maintained internally using thread-local storage. - * - * Return 0 on success; or -1 on error. - * - * MT-safe as long as dst is local to the thread. - */ -extern int yespower_tls_p2b(const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t_p2b *dst); - -#ifdef __cplusplus -} -#endif - -#endif /* !_YESPOWER_H_ */ diff --git a/yespower-1.0.1-power2b/yespower-platform-p2b.c b/yespower-1.0.1-power2b/yespower-platform-p2b.c deleted file mode 100644 index 2b1a03f0f..000000000 --- a/yespower-1.0.1-power2b/yespower-platform-p2b.c +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * Copyright 2013-2018 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef __unix__ -#include -#endif - -#define HUGEPAGE_THRESHOLD (12 * 1024 * 1024) - -#ifdef __x86_64__ -#define HUGEPAGE_SIZE (2 * 1024 * 1024) -#else -#undef HUGEPAGE_SIZE -#endif - -static void *alloc_region(yespower_region_t *region, size_t size) -{ - size_t base_size = size; - uint8_t *base, *aligned; -#ifdef MAP_ANON - int flags = -#ifdef MAP_NOCORE - MAP_NOCORE | -#endif - MAP_ANON | MAP_PRIVATE; -#if defined(MAP_HUGETLB) && defined(HUGEPAGE_SIZE) - size_t new_size = size; - const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1; - if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) { - flags |= MAP_HUGETLB; -/* - * Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of - * huge page size, so let's round up to huge page size here. - */ - new_size = size + hugepage_mask; - new_size &= ~hugepage_mask; - } - base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (base != MAP_FAILED) { - base_size = new_size; - } else if (flags & MAP_HUGETLB) { - flags &= ~MAP_HUGETLB; - base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); - } - -#else - base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); -#endif - if (base == MAP_FAILED) - base = NULL; - aligned = base; -#elif defined(HAVE_POSIX_MEMALIGN) - if ((errno = posix_memalign((void **)&base, 64, size)) != 0) - base = NULL; - aligned = base; -#else - base = aligned = NULL; - if (size + 63 < size) { - errno = ENOMEM; - } else if ((base = malloc(size + 63)) != NULL) { - aligned = base + 63; - aligned -= (uintptr_t)aligned & 63; - } -#endif - region->base = base; - region->aligned = aligned; - region->base_size = base ? base_size : 0; - region->aligned_size = base ? size : 0; - return aligned; -} - -static inline void init_region(yespower_region_t *region) -{ - region->base = region->aligned = NULL; - region->base_size = region->aligned_size = 0; -} - -static int free_region(yespower_region_t *region) -{ - if (region->base) { -#ifdef MAP_ANON - if (munmap(region->base, region->base_size)) - return -1; -#else - free(region->base); -#endif - } - init_region(region); - return 0; -} diff --git a/yespower-1.0.1-power2b/yespower-ref-p2b.c b/yespower-1.0.1-power2b/yespower-ref-p2b.c deleted file mode 100644 index 9aca55f11..000000000 --- a/yespower-1.0.1-power2b/yespower-ref-p2b.c +++ /dev/null @@ -1,582 +0,0 @@ -/*- - * Copyright 2009 Colin Percival - * Copyright 2013-2019 Alexander Peslyak - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * This file was originally written by Colin Percival as part of the Tarsnap - * online backup system. - * - * This is a proof-of-work focused fork of yescrypt, including reference and - * cut-down implementation of the obsolete yescrypt 0.5 (based off its first - * submission to PHC back in 2014) and a new proof-of-work specific variation - * known as yespower 1.0. The former is intended as an upgrade for - * cryptocurrencies that already use yescrypt 0.5 and the latter may be used - * as a further upgrade (hard fork) by those and other cryptocurrencies. The - * version of algorithm to use is requested through parameters, allowing for - * both algorithms to co-exist in client and miner implementations (such as in - * preparation for a hard-fork). - * - * This is the reference implementation. Its purpose is to provide a simple - * human- and machine-readable specification that implementations intended - * for actual use should be tested against. It is deliberately mostly not - * optimized, and it is not meant to be used in production. Instead, use - * yespower-opt.c. - */ - -#warning "This reference implementation is deliberately mostly not optimized. Use yespower-opt.c instead unless you're testing (against) the reference implementation on purpose." - -#include -#include -#include -#include - -#include "sha256-p2b.h" -#include "sysendian-p2b.h" - -#include "yespower-p2b.h" - -static void blkcpy(uint32_t *dst, const uint32_t *src, size_t count) -{ - do { - *dst++ = *src++; - } while (--count); -} - -static void blkxor(uint32_t *dst, const uint32_t *src, size_t count) -{ - do { - *dst++ ^= *src++; - } while (--count); -} - -/** - * salsa20(B): - * Apply the Salsa20 core to the provided block. - */ -static void salsa20(uint32_t B[16], uint32_t rounds) -{ - uint32_t x[16]; - size_t i; - - /* SIMD unshuffle */ - for (i = 0; i < 16; i++) - x[i * 5 % 16] = B[i]; - - for (i = 0; i < rounds; i += 2) { -#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) - /* Operate on columns */ - x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); - x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); - - x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); - x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); - - x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); - x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); - - x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); - x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); - - /* Operate on rows */ - x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); - x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); - - x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); - x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); - - x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); - x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); - - x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); - x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); -#undef R - } - - /* SIMD shuffle */ - for (i = 0; i < 16; i++) - B[i] += x[i * 5 % 16]; -} - -/** - * blockmix_salsa(B): - * Compute B = BlockMix_{salsa20, 1}(B). The input B must be 128 bytes in - * length. - */ -static void blockmix_salsa(uint32_t *B, uint32_t rounds) -{ - uint32_t X[16]; - size_t i; - - /* 1: X <-- B_{2r - 1} */ - blkcpy(X, &B[16], 16); - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < 2; i++) { - /* 3: X <-- H(X xor B_i) */ - blkxor(X, &B[i * 16], 16); - salsa20(X, rounds); - - /* 4: Y_i <-- X */ - /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - blkcpy(&B[i * 16], X, 16); - } -} - -/* - * These are tunable, but they must meet certain constraints and are part of - * what defines a yespower version. - */ -#define PWXsimple 2 -#define PWXgather 4 -/* Version 0.5 */ -#define PWXrounds_0_5 6 -#define Swidth_0_5 8 -/* Version 1.0 */ -#define PWXrounds_1_0 3 -#define Swidth_1_0 11 - -/* Derived values. Not tunable on their own. */ -#define PWXbytes (PWXgather * PWXsimple * 8) -#define PWXwords (PWXbytes / sizeof(uint32_t)) -#define rmin ((PWXbytes + 127) / 128) - -/* Runtime derived values. Not tunable on their own. */ -#define Swidth_to_Sbytes1_P2b(Swidth) ((1 << Swidth) * PWXsimple * 8) -#define Swidth_to_Smask(Swidth) (((1 << Swidth) - 1) * PWXsimple * 8) - -typedef struct { - yespower_version_t version; - uint32_t salsa20_rounds; - uint32_t PWXrounds, Swidth, Sbytes, Smask; - uint32_t *S; - uint32_t (*S0)[2], (*S1)[2], (*S2)[2]; - size_t w; -} pwxform_ctx_t; - -/** - * pwxform(B): - * Transform the provided block using the provided S-boxes. - */ -static void pwxform(uint32_t *B, pwxform_ctx_t *ctx) -{ - uint32_t (*X)[PWXsimple][2] = (uint32_t (*)[PWXsimple][2])B; - uint32_t (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2; - uint32_t Smask = ctx->Smask; - size_t w = ctx->w; - size_t i, j, k; - - /* 1: for i = 0 to PWXrounds - 1 do */ - for (i = 0; i < ctx->PWXrounds; i++) { - /* 2: for j = 0 to PWXgather - 1 do */ - for (j = 0; j < PWXgather; j++) { - uint32_t xl = X[j][0][0]; - uint32_t xh = X[j][0][1]; - uint32_t (*p0)[2], (*p1)[2]; - - /* 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) */ - p0 = S0 + (xl & Smask) / sizeof(*S0); - /* 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) */ - p1 = S1 + (xh & Smask) / sizeof(*S1); - - /* 5: for k = 0 to PWXsimple - 1 do */ - for (k = 0; k < PWXsimple; k++) { - uint64_t x, s0, s1; - - /* 6: B_{j,k} <-- (hi(B_{j,k}) * lo(B_{j,k}) + S0_{p0,k}) xor S1_{p1,k} */ - s0 = ((uint64_t)p0[k][1] << 32) + p0[k][0]; - s1 = ((uint64_t)p1[k][1] << 32) + p1[k][0]; - - xl = X[j][k][0]; - xh = X[j][k][1]; - - x = (uint64_t)xh * xl; - x += s0; - x ^= s1; - - X[j][k][0] = x; - X[j][k][1] = x >> 32; - } - - if (ctx->version != YESPOWER_0_5 && - (i == 0 || j < PWXgather / 2)) { - if (j & 1) { - for (k = 0; k < PWXsimple; k++) { - S1[w][0] = X[j][k][0]; - S1[w][1] = X[j][k][1]; - w++; - } - } else { - for (k = 0; k < PWXsimple; k++) { - S0[w + k][0] = X[j][k][0]; - S0[w + k][1] = X[j][k][1]; - } - } - } - } - } - - if (ctx->version != YESPOWER_0_5) { - /* 14: (S0, S1, S2) <-- (S2, S0, S1) */ - ctx->S0 = S2; - ctx->S1 = S0; - ctx->S2 = S1; - /* 15: w <-- w mod 2^Swidth */ - ctx->w = w & ((1 << ctx->Swidth) * PWXsimple - 1); - } -} - -/** - * blockmix_pwxform(B, ctx, r): - * Compute B = BlockMix_pwxform{salsa20, ctx, r}(B). The input B must be - * 128r bytes in length. - */ -static void blockmix_pwxform(uint32_t *B, pwxform_ctx_t *ctx, size_t r) -{ - uint32_t X[PWXwords]; - size_t r1, i; - - /* Convert 128-byte blocks to PWXbytes blocks */ - /* 1: r_1 <-- 128r / PWXbytes */ - r1 = 128 * r / PWXbytes; - - /* 2: X <-- B'_{r_1 - 1} */ - blkcpy(X, &B[(r1 - 1) * PWXwords], PWXwords); - - /* 3: for i = 0 to r_1 - 1 do */ - for (i = 0; i < r1; i++) { - /* 4: if r_1 > 1 */ - if (r1 > 1) { - /* 5: X <-- X xor B'_i */ - blkxor(X, &B[i * PWXwords], PWXwords); - } - - /* 7: X <-- pwxform(X) */ - pwxform(X, ctx); - - /* 8: B'_i <-- X */ - blkcpy(&B[i * PWXwords], X, PWXwords); - } - - /* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */ - i = (r1 - 1) * PWXbytes / 64; - - /* 11: B_i <-- H(B_i) */ - salsa20(&B[i * 16], ctx->salsa20_rounds); - -#if 1 /* No-op with our current pwxform settings, but do it to make sure */ - /* 12: for i = i + 1 to 2r - 1 do */ - for (i++; i < 2 * r; i++) { - /* 13: B_i <-- H(B_i xor B_{i-1}) */ - blkxor(&B[i * 16], &B[(i - 1) * 16], 16); - salsa20(&B[i * 16], ctx->salsa20_rounds); - } -#endif -} - -/** - * integerify(B, r): - * Return the result of parsing B_{2r-1} as a little-endian integer. - */ -static uint32_t integerify(const uint32_t *B, size_t r) -{ -/* - * Our 32-bit words are in host byte order. Also, they are SIMD-shuffled, but - * we only care about the least significant 32 bits anyway. - */ - const uint32_t *X = &B[(2 * r - 1) * 16]; - return X[0]; -} - -/** - * p2floor(x): - * Largest power of 2 not greater than argument. - */ -static uint32_t p2floor(uint32_t x) -{ - uint32_t y; - while ((y = x & (x - 1))) - x = y; - return x; -} - -/** - * wrap(x, i): - * Wrap x to the range 0 to i-1. - */ -static uint32_t wrap(uint32_t x, uint32_t i) -{ - uint32_t n = p2floor(i); - return (x & (n - 1)) + (i - n); -} - -/** - * smix1(B, r, N, V, X, ctx): - * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage X must be 128r bytes in length. - */ -static void smix1(uint32_t *B, size_t r, uint32_t N, - uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx) -{ - size_t s = 32 * r; - uint32_t i, j; - size_t k; - - /* 1: X <-- B */ - for (k = 0; k < 2 * r; k++) - for (i = 0; i < 16; i++) - X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); - - if (ctx->version != YESPOWER_0_5) { - for (k = 1; k < r; k++) { - blkcpy(&X[k * 32], &X[(k - 1) * 32], 32); - blockmix_pwxform(&X[k * 32], ctx, 1); - } - } - - /* 2: for i = 0 to N - 1 do */ - for (i = 0; i < N; i++) { - /* 3: V_i <-- X */ - blkcpy(&V[i * s], X, s); - - if (i > 1) { - /* j <-- Wrap(Integerify(X), i) */ - j = wrap(integerify(X, r), i); - - /* X <-- X xor V_j */ - blkxor(X, &V[j * s], s); - } - - /* 4: X <-- H(X) */ - if (V != ctx->S) - blockmix_pwxform(X, ctx, r); - else - blockmix_salsa(X, ctx->salsa20_rounds); - } - - /* B' <-- X */ - for (k = 0; k < 2 * r; k++) - for (i = 0; i < 16; i++) - le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); -} - -/** - * smix2(B, r, N, Nloop, V, X, ctx): - * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in - * length; the temporary storage V must be 128rN bytes in length; the temporary - * storage X must be 128r bytes in length. The value N must be a power of 2 - * greater than 1. - */ -static void smix2(uint32_t *B, size_t r, uint32_t N, uint32_t Nloop, - uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx) -{ - size_t s = 32 * r; - uint32_t i, j; - size_t k; - - /* X <-- B */ - for (k = 0; k < 2 * r; k++) - for (i = 0; i < 16; i++) - X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); - - /* 6: for i = 0 to N - 1 do */ - for (i = 0; i < Nloop; i++) { - /* 7: j <-- Integerify(X) mod N */ - j = integerify(X, r) & (N - 1); - - /* 8.1: X <-- X xor V_j */ - blkxor(X, &V[j * s], s); - /* V_j <-- X */ - if (Nloop != 2) - blkcpy(&V[j * s], X, s); - - /* 8.2: X <-- H(X) */ - blockmix_pwxform(X, ctx, r); - } - - /* 10: B' <-- X */ - for (k = 0; k < 2 * r; k++) - for (i = 0; i < 16; i++) - le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); -} - -/** - * smix(B, r, N, p, t, V, X, ctx): - * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the - * temporary storage V must be 128rN bytes in length; the temporary storage - * X must be 128r bytes in length. The value N must be a power of 2 and at - * least 16. - */ -static void smix(uint32_t *B, size_t r, uint32_t N, - uint32_t *V, uint32_t *X, pwxform_ctx_t *ctx) -{ - uint32_t Nloop_all = (N + 2) / 3; /* 1/3, round up */ - uint32_t Nloop_rw = Nloop_all; - - Nloop_all++; Nloop_all &= ~(uint32_t)1; /* round up to even */ - if (ctx->version == YESPOWER_0_5) { - Nloop_rw &= ~(uint32_t)1; /* round down to even */ - } else { - Nloop_rw++; Nloop_rw &= ~(uint32_t)1; /* round up to even */ - } - - smix1(B, 1, ctx->Sbytes / 128, ctx->S, X, ctx); - smix1(B, r, N, V, X, ctx); - smix2(B, r, N, Nloop_rw /* must be > 2 */, V, X, ctx); - smix2(B, r, N, Nloop_all - Nloop_rw /* 0 or 2 */, V, X, ctx); -} - -/** - * yespower(local, src, srclen, params, dst): - * Compute yespower(src[0 .. srclen - 1], N, r), to be checked for "< target". - * - * Return 0 on success; or -1 on error. - */ -int yespower(yespower_local_t *local, - const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t_p2b *dst) -{ - yespower_version_t version = params->version; - uint32_t N = params->N; - uint32_t r = params->r; - const uint8_t *pers = params->pers; - size_t perslen = params->perslen; - int retval = -1; - size_t B_size, V_size; - uint32_t *B, *V, *X, *S; - pwxform_ctx_t ctx; - uint32_t sha256[8]; - - memset(dst, 0xff, sizeof(*dst)); - - /* Sanity-check parameters */ - if ((version != YESPOWER_0_5 && version != YESPOWER_1_0) || - N < 1024 || N > 512 * 1024 || r < 8 || r > 32 || - (N & (N - 1)) != 0 || r < rmin || - (!pers && perslen)) { - errno = EINVAL; - return -1; - } - - /* Allocate memory */ - B_size = (size_t)128 * r; - V_size = B_size * N; - if ((V = malloc(V_size)) == NULL) - return -1; - if ((B = malloc(B_size)) == NULL) - goto free_V; - if ((X = malloc(B_size)) == NULL) - goto free_B; - ctx.version = version; - if (version == YESPOWER_0_5) { - ctx.salsa20_rounds = 8; - ctx.PWXrounds = PWXrounds_0_5; - ctx.Swidth = Swidth_0_5; - ctx.Sbytes = 2 * Swidth_to_Sbytes1_P2b(ctx.Swidth); - } else { - ctx.salsa20_rounds = 2; - ctx.PWXrounds = PWXrounds_1_0; - ctx.Swidth = Swidth_1_0; - ctx.Sbytes = 3 * Swidth_to_Sbytes1_P2b(ctx.Swidth); - } - if ((S = malloc(ctx.Sbytes)) == NULL) - goto free_X; - ctx.S = S; - ctx.S0 = (uint32_t (*)[2])S; - ctx.S1 = ctx.S0 + (1 << ctx.Swidth) * PWXsimple; - ctx.S2 = ctx.S1 + (1 << ctx.Swidth) * PWXsimple; - ctx.Smask = Swidth_to_Smask(ctx.Swidth); - ctx.w = 0; - - SHA256_Buf(src, srclen, (uint8_t *)sha256); - - if (version != YESPOWER_0_5) { - if (pers) { - src = pers; - srclen = perslen; - } else { - srclen = 0; - } - } - - /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ - PBKDF2_SHA256_P2B((uint8_t *)sha256, sizeof(sha256), - src, srclen, 1, (uint8_t *)B, B_size); - - blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); - - /* 3: B_i <-- MF(B_i, N) */ - smix(B, r, N, V, X, &ctx); - - if (version == YESPOWER_0_5) { - /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ - PBKDF2_SHA256_P2B((uint8_t *)sha256, sizeof(sha256), - (uint8_t *)B, B_size, 1, (uint8_t *)dst, sizeof(*dst)); - - if (pers) { - HMAC_SHA256_Buf_P2b(dst, sizeof(*dst), pers, perslen, - (uint8_t *)sha256); - SHA256_Buf(sha256, sizeof(sha256), (uint8_t *)dst); - } - } else { - HMAC_SHA256_Buf_P2b((uint8_t *)B + B_size - 64, 64, - sha256, sizeof(sha256), (uint8_t *)dst); - } - - /* Success! */ - retval = 0; - - /* Free memory */ - free(S); -free_X: - free(X); -free_B: - free(B); -free_V: - free(V); - - return retval; -} - -int yespower_tls_p2b(const uint8_t *src, size_t srclen, - const yespower_params_t *params, yespower_binary_t_p2b *dst) -{ -/* The reference implementation doesn't use thread-local storage */ - return yespower(NULL, src, srclen, params, dst); -} - -int yespower_init_local_p2b(yespower_local_t *local) -{ -/* The reference implementation doesn't use the local structure */ - local->base = local->aligned = NULL; - local->base_size = local->aligned_size = 0; - return 0; -} - -int yespower_free_local_p2b(yespower_local_t *local) -{ -/* The reference implementation frees its memory in yespower() */ - (void)local; /* unused */ - return 0; -}