/*
C implementation by John Ryland
Based on psuedo code algorithm on wikipedia
Copyright (c) 2013
*/
#include <stdint.h>
#include <assert.h>
#include <endian.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Initialize variables
// (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
static const uint32_t h[8] =
{
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
// Initialize table of round constants
// (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
static const uint32_t k[64] =
{
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
void SHA256(const uint8_t* __restrict message, uint64_t length, uint32_t __restrict hash[8])
{
//Initialize hash value
memcpy(hash, h, 8*sizeof(uint32_t));
//append the bit '1' to the message
//append k bits '0', where k is the minimum number >= 0 such that the resulting message
// length (in bits) is 448 (modulo 512).
uint8_t msg[length + 65];
memcpy(msg, message, length);
msg[length] = 0x80;
uint64_t bitLength = (length+1) * 8;
for (int i = 1; i < 64; i++)
msg[length+i] = 0x00;
if ( (bitLength % 512) > 448 ) {
bitLength = (512 - (bitLength % 512)) + 448;
} else {
bitLength += 448 - (bitLength % 512);
}
assert((bitLength % 512) == 448);
//append length of message (before pre-processing), in bits, as 64-bit big-endian integer
length *= 8;
*((uint64_t*)(msg + (bitLength/8))) = htobe64(length);
bitLength += 64;
//Process the message in successive 512-bit chunks:
for (uint64_t bit = 0ULL; bit < bitLength; bit += 512)
{
uint32_t chunk[64];
for (int i = 0; i < 16; i++)
{
//break chunk into sixteen 32-bit big-endian words w[0..15]
chunk[i] = be32toh(((uint32_t*)&msg[bit/8])[i]);
}
//Extend the sixteen 32-bit words into sixty-four 32-bit words:
for (int i = 16; i < 64; i++)
{
// we can do rotate right in C using shift right if we duplicate the bits
uint32_t s0 = chunk[i - 15];
uint32_t s1 = chunk[i - 2];
uint64_t v = (uint64_t(s0) << 32) | s0;
s0 = (v >> 7) ^ (v >> 18) ^ (s0 >> 3); // perhaps potential to use LUTs for s0, s1
v = (uint64_t(s1) << 32) | s1;
s1 = (v >> 17) ^ (v >> 19) ^ (s1 >> 10);
chunk[i] = chunk[i - 16] + s0 + chunk[i - 7] + s1;
}
uint64_t x[8];
// memcpy(x, hash, 8*sizeof(uint32_t));
for (int i = 0; i < 8; i++)
x[i] = hash[i] | (uint64_t(hash[i]) << 32);
uint64_t temps[64];
for (int i = 0; i < 64; i++)
{
temps[i] = k[i] + chunk[i];
temps[i] |= temps[i] << 32;
}
// x + y => (x ^ y) + ((x & y) << 1)
// rotate right simulated with 64bit value with high and low order copy
// Using a shift right on that to give the same as rotate right after masking
// back down to 32bits
// could also shift left instead and take the high order 32bits
// a shift left has the advantage of being simulated with a multiple by power of two
// a shift right however needs to use divide which probably is slower
// the reason to simulate with multiply/divide is to do with utilizing the FPU and SSE
// registers also which might not natively otherwise do rotates and shifts
// so with a 64bit double, I wonder if there are enough mantissa bits
// and can SSE registers store two 64bit integers and do multiplications on those
//
uint64_t *x2 = x;
uint64_t *t2 = temps;
uint32_t temp;
//Main loop:
for (int i = 0; i < 8; i++)
{
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
temp = x2[7] + ((x2[4] >> 6) ^ (x2[4] >> 11) ^ (x2[4] >> 25)) + ((x2[4] & x2[5]) | (~x2[4] & x2[6])) + *t2++;
x2[3] = (x2[3] + temp) & 0xffffffff;
x2[3] |= x[3] << 32;
temp += ((x2[0] >> 2) ^ (x2[0] >> 13) ^ (x2[0] >> 22)) + ((x2[0] & (x2[1] ^ x2[2])) ^ (x2[1] & x2[2]));
for (int j = 7; j; --j)
x2[j] = x2[j - 1]; // by unrolling the i loop, this copying can be removed
x2[0] = temp | (uint64_t(temp) << 32);
}
//Add this chunk's hash to result so far:
for (int i = 0; i < 8; ++i)
hash[i] += (x[i] & 0xffffffff);
}
//the final hash value (big-endian) is in hash
}
void SHA256_Test(const char* test, const char* msg, const char* cmp)
{
uint32_t hash[8];
SHA256((uint8_t*)msg, strlen(msg), hash);
char buffer[1024];
snprintf(buffer, 1024, "0x%08x%08x%08x%08x%08x%08x%08x%08x", hash[0],
hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7]);
if ( !strcmp(buffer, cmp) ) {
printf("%s: PASS\n", test);
} else {
printf("%s: FAIL\n", test);
printf("Hash: %s\n", buffer);
printf("Compare: %s\n", cmp);
exit(-1);
}
}
int main()
{
SHA256_Test("Test1", "", "0xe3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
SHA256_Test("Test2", "The quick brown fox jumps over the lazy dog", "0xd7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592");
SHA256_Test("Test3", "The quick brown fox jumps over the lazy dog.", "0xef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c");
return 0;
}