diff --git a/Crypto/Integer/Integer.cpp b/Crypto/Integer/Integer.cpp
new file mode 100644
index 0000000..52318b2
--- /dev/null
+++ b/Crypto/Integer/Integer.cpp
@@ -0,0 +1,746 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#include "Integer.h"
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include "../../Util.h"
+
+enum ComparisonResult
+{
+  CR_Equal,
+  CR_LessThan,
+  CR_GreaterThan
+};
+
+struct uint2048_t::Pimpl
+{
+  void Init(const char* str);
+  void Copy(const uint2048_t& other);
+  void Normalize();
+  size_t GetSize();
+
+  static ComparisonResult Compare(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Add(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Subtract(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Multiply(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Divide(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Modulus(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t ShiftLeft(const uint2048_t& a, int shift);
+  static uint2048_t ShiftRight(const uint2048_t& a, int shift);
+  static uint2048_t Or(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t And(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Xor(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Not(const uint2048_t& a);
+
+  // Helper function to optimize the ExpMod function
+  static void MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest);
+
+#define s_size  257 // Fixed at 2048 bits (256*8)
+  uint8_t m_data[s_size];
+};
+
+uint2048_t::uint2048_t() // Default constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init("0");
+}
+
+uint2048_t::uint2048_t(const uint2048_t& other) // Copy constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Copy(other);
+}
+
+uint2048_t::uint2048_t(uint64_t a_number) // Create from a number
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  std::string numStr = int2hex(a_number);
+  //printf("\n init from int: 0x%x -> %s\n", int(a_number), numStr.c_str());
+  m_pimpl->Init(numStr.c_str());
+  //m_pimpl->Init(int2hex(a_number).c_str());
+}
+
+uint2048_t::uint2048_t(const char* str) // Create from a string
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init(str);
+}
+
+uint2048_t::~uint2048_t() // Destructor
+{
+}
+
+void uint2048_t::operator=(const uint2048_t& other) // Assignment operator
+{
+  m_pimpl->Copy(other);
+}
+
+bool uint2048_t::operator!() const // Is zero
+{
+  return (Pimpl::Compare(*this, uint2048_t()) == CR_Equal);
+}
+
+bool uint2048_t::operator!=(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) != CR_Equal);
+}
+
+bool uint2048_t::operator==(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_Equal);
+}
+
+bool uint2048_t::operator<(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_LessThan);
+}
+
+bool uint2048_t::operator>(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_GreaterThan);
+}
+
+bool uint2048_t::operator<=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_LessThan);
+}
+
+bool uint2048_t::operator>=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_GreaterThan);
+}
+
+bool uint2048_t::operator&&(const uint2048_t& other) const
+{
+  return (!!*this) && (!!other);
+}
+
+bool uint2048_t::operator||(const uint2048_t& other) const
+{
+  return (!!*this) || (!!other);
+}
+
+uint2048_t& uint2048_t::operator--()
+{
+  *this = Pimpl::Subtract(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator++()
+{
+  *this = Pimpl::Add(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t uint2048_t::operator++(int) 
+{
+  uint2048_t tmp(*this);
+  operator++();
+  return tmp;
+}
+
+uint2048_t uint2048_t::operator--(int) 
+{
+  uint2048_t tmp(*this);
+  operator--();
+  return tmp;
+}
+
+uint2048_t& uint2048_t::operator-=(const uint2048_t& other)
+{
+  *this = Pimpl::Subtract(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator+=(const uint2048_t& other)
+{
+  *this = Pimpl::Add(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator*=(const uint2048_t& other) 
+{
+  *this = Pimpl::Multiply(*this, other);
+  return *this;
+}
+
+uint2048_t uint2048_t::operator-(const uint2048_t& other) const
+{
+  return Pimpl::Subtract(*this, other);
+}
+
+uint2048_t uint2048_t::operator+(const uint2048_t& other) const
+{
+  return Pimpl::Add(*this, other);
+}
+
+uint2048_t uint2048_t::operator*(const uint2048_t& other) const
+{
+  return Pimpl::Multiply(*this, other);
+}
+
+uint2048_t uint2048_t::operator/(const uint2048_t& other) const
+{
+  return Pimpl::Divide(*this, other);
+}
+
+uint2048_t uint2048_t::operator%(const uint2048_t& other) const
+{
+  return Pimpl::Modulus(*this, other);
+}
+
+uint2048_t uint2048_t::operator&(const uint2048_t& other) const
+{
+  return Pimpl::And(*this, other);
+}
+
+uint2048_t uint2048_t::operator|(const uint2048_t& other) const
+{
+  return Pimpl::Or(*this, other);
+}
+
+uint2048_t uint2048_t::operator^(const uint2048_t& other) const
+{
+  return Pimpl::Xor(*this, other);
+}
+
+uint2048_t uint2048_t::operator~() const
+{
+  return Pimpl::Not(*this);
+}
+
+uint2048_t uint2048_t::operator>>(int shift) const
+{
+  return Pimpl::ShiftRight(*this, shift);
+}
+
+uint2048_t uint2048_t::operator<<(int shift) const
+{
+  return Pimpl::ShiftLeft(*this, shift);
+}
+
+uint2048_t& uint2048_t::operator>>=(int shift)
+{
+  *this = Pimpl::ShiftRight(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator<<=(int shift)
+{
+  *this = Pimpl::ShiftLeft(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator/=(const uint2048_t& other)
+{
+  *this = Pimpl::Divide(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator%=(const uint2048_t& other)
+{
+  *this = Pimpl::Modulus(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator&=(const uint2048_t& other)
+{
+  *this = Pimpl::And(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator|=(const uint2048_t& other)
+{
+  *this = Pimpl::Or(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator^=(const uint2048_t& other)
+{
+  *this = Pimpl::Xor(*this, other);
+  return *this;
+}
+
+uint2048_t::operator uint64_t() const
+{
+  return (uint64_t)strtoull(std::string(*this).c_str(), 0, 16);
+}
+
+void uint2048_t::Print() const
+{
+  std::cout << std::string(*this);
+}
+
+// base ^ exponent % modulus
+uint2048_t uint2048_t::ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus)
+{
+  uint2048_t dst(1);
+  uint2048_t a = base % modulus;
+  uint2048_t b = exponent % modulus;
+
+  size_t mn = modulus.m_pimpl->GetSize();
+  const uint8_t *modBits = modulus.m_pimpl->m_data;
+  uint8_t *aBits = a.m_pimpl->m_data;
+  size_t an = a.m_pimpl->GetSize();
+
+#if 1 
+  // pre-computed modulus shifted by 0 to 7
+  uint8_t tmpModBits[s_size*16];
+  for (int shift = 0; shift < 8; shift++)
+  {
+    uint32_t overflow = 0;
+    uint8_t* mb = &tmpModBits[s_size*2*shift];
+    memset(mb, 0, s_size*2);
+
+    for (int i = 0; i < s_size; i++)
+    {
+      uint32_t var = modBits[i] << shift;
+      mb[i] = var | overflow;
+      overflow = var >> 8;
+    }
+  }
+#endif
+
+  uint8_t tmp1Bits[s_size*2];
+  uint8_t *dstBits = dst.m_pimpl->m_data;
+
+  //printf("\n a: %s\n", std::string(a).c_str());
+  //printf("\n b: %s\n", std::string(b).c_str());
+  while (!!b) // while (bnbits)
+  {
+    if (b[0] & 1)  // if (!!(b & BigInteger(1)))
+    {
+      //dst = (dst * a) % modulus;
+      memset(tmp1Bits, 0, s_size*2);
+      size_t dn = dst.m_pimpl->GetSize();
+      Pimpl::MulMod(dstBits, aBits, tmpModBits, dn, an, mn, tmp1Bits);
+      memcpy(dstBits, tmp1Bits, s_size);
+    }
+
+    //a = (a * a) % modulus;
+    memset(tmp1Bits, 0, s_size*2);
+    Pimpl::MulMod(aBits, aBits, tmpModBits, an, an, mn, tmp1Bits);
+    memcpy(aBits, tmp1Bits, s_size);
+    an = a.m_pimpl->GetSize();
+    
+    b >>= 1; // perhaps could instead of shifting, test a particular bit instead of lowest bit
+    // bnbits--;
+  }
+
+  return dst;
+}
+
+uint2048_t::operator std::string() const
+{
+  std::ostringstream oss;
+  oss << "0x";
+  size_t n = m_pimpl->GetSize();
+  for (size_t i = 0; i < n; i++)
+    oss << std::setfill('0') << std::setw(2) << std::hex << (int)m_pimpl->m_data[n - i - 1];
+  return oss.str();
+}
+
+size_t uint2048_t::Size() const
+{
+  return (sizeof(m_pimpl->m_data) / sizeof(m_pimpl->m_data[0]));
+}
+
+uint8_t uint2048_t::operator[](size_t index) const
+{
+  return m_pimpl->m_data[index];
+}
+
+
+////////////////// PRIVATE //////////////////////////////////////////////////
+
+static unsigned int HexCharToInt(unsigned char ch)
+{
+  unsigned int c = tolower(ch);
+  return (c >= 'a') ? c - 'a' + 10 : c - '0';
+}
+
+void uint2048_t::Pimpl::Init(const char* str)
+{
+  int len = strlen(str);
+  memset(m_data, 0, s_size);
+  size_t idx = 0;
+  for (int i = 0; i < len; i++, idx++)
+  {
+    int val = HexCharToInt(str[len - 1 - i]);
+    i++;
+    if (i < len)
+      val |= HexCharToInt(str[len - 1 - i]) << 4;
+    m_data[idx] = val;
+  }
+}
+
+void uint2048_t::Pimpl::Copy(const uint2048_t& other)
+{
+  memcpy(m_data, other.m_pimpl->m_data, sizeof(m_data));
+}
+
+void uint2048_t::Pimpl::Normalize()
+{
+}
+
+size_t uint2048_t::Pimpl::GetSize()
+{
+  size_t an;
+  for (an = s_size - 1; an; an--)
+    if (m_data[an])
+      break;
+  an++;
+  return an;
+}
+
+ComparisonResult uint2048_t::Pimpl::Compare(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = s_size - 1;
+  ComparisonResult result = CR_Equal;
+  do {
+    if (a[an] > b[an]) {
+      result = CR_GreaterThan; // a > b
+      break;
+    }
+    if (a[an] < b[an]) {
+      result = CR_LessThan; // b > a
+      break;
+    }
+  } while (an--);
+  /*
+  // This extra loop has no meaning except to try to show how we might reduce
+  // timing based side channel attacks by making the running time more consistent
+  int dummy = 0;
+  do {
+    if (a[an] > b[an]) {
+      dummy++;
+    }
+    if (a[an] < b[an]) {
+      dummy--;
+    }
+  } while (an--);
+  */
+  return result; // the two values are equal
+}
+
+uint2048_t uint2048_t::Pimpl::Or(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] |= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::And(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] &= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Xor(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] ^= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Not(const uint2048_t& a)
+{
+  uint2048_t result;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] = ~a[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Add(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint8_t carry = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the carry from previous iteration
+    uint32_t tmp = a[i] + carry + b[i];
+    //if (i != s_size - 1)
+    result.m_pimpl->m_data[i] = tmp;
+    carry = tmp >> 8;
+  }
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Subtract(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint32_t borrow = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the borrow from previous iteration
+    uint32_t tmp = a[i] - borrow - b[i];
+    result.m_pimpl->m_data[i] = tmp;
+    borrow = (tmp >> 8) & 1;
+  }
+  return result;
+}
+
+// big number multiply algorithms
+// https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
+uint2048_t uint2048_t::Pimpl::Multiply(const uint2048_t& a, const uint2048_t& b)
+{
+  //if (b > a) // Swap if a is not the larger value. We can now assume a is the larger value after
+  //  return Multiply(b, a);
+
+  // Work out expected output size
+  size_t an = a.m_pimpl->GetSize();
+  size_t bn = b.m_pimpl->GetSize();
+  //if (bn > an)
+  //  return Multiply(b, a);
+
+/*
+a=   1234   a[0] = 4, a[1] = 3 ...
+b=  x5678
+    -----
+      4x8            r[0+0] +=                      (a[0] x b[0]) % 10
+     +4x7x10         r[0+1] += (a[0] x b[0]) / 10 + (a[0] x b[1]) % 10
+     +4x6x100        r[0+2] += (a[0] x b[1]) / 10 + (a[0] x b[2]) % 10
+     +4x5x1000       r[0+3] += (a[0] x b[2]) / 10 + (a[0] x b[3]) % 10
+     +3x8x10         r[1+0] +=                      (a[1] x b[0]) % 10
+     +3x7x100        r[1+1] += (a[1] x b[0]) / 10 + (a[1] x b[1]) % 10
+     +3x6x1000
+     +3x5x10000
+  -----
+*/
+
+  // Setup a destination value
+  uint2048_t d;
+  uint8_t dest[s_size*2];
+  memset(dest, 0, s_size*2);
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    //dest[bn+j] = dest[bn+j] + c1 + c2;
+    dest[bn+j] += c1 + c2;
+  }
+  memcpy(d.m_pimpl->m_data, dest, s_size);
+  return d;
+}
+
+uint2048_t uint2048_t::Pimpl::Divide(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  if (bn > an)
+    return uint2048_t();
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t bit = uint2048_t(1) << bitsLarger;
+  //printf(" bit: %s \n", std::string(bit).c_str());
+  uint2048_t tmp = b << bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  uint2048_t res = uint2048_t();
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      res += bit;
+    }
+    tmp >>= 1;
+    bit >>= 1;
+  } while (bitsLarger--);
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::Modulus(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  //printf(" an: %i bn: %i \n", an, bn);
+  //printf("\n");
+  if (bn > an)
+  {
+    //printf("\n bn > an   an: %i bn: %i \n", an, bn);
+    return a;
+  }
+  if (bn == an)
+    if (b > a)
+    {
+      //printf("\n b > a   an: %i bn: %i \n", an, bn);
+      //printf(" a: %s \n", std::string(a).c_str());
+      //printf(" b: %s \n\n", std::string(b).c_str());
+      return a;
+    }
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t tmp = b;
+  tmp <<= bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      //printf(" mod: %s \n", std::string(mod).c_str());
+    }
+    tmp >>= 1;
+    //printf(" tmp: %s \n", std::string(tmp).c_str());
+  } while (bitsLarger--);
+  //printf(" mod: %s \n", std::string(mod).c_str());
+  return mod;
+}
+
+void uint2048_t::Pimpl::MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest)
+//uint2048_t uint2048_t::Pimpl::MulMod(const uint2048_t& a, const uint2048_t& b, const uint2048_t& mod)
+{
+  // This is where we are doing the multiplication in to dest (which is double width)
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    dest[bn+j] += c1 + c2;
+  }
+
+  // 'an' is now the size of dest
+  an = an + bn;
+
+  uint8_t* dstPtr = dest;
+
+  // check the magnitude of modulus is about or less than dest (otherwise we can just return dest)
+  if (mn <= an)
+  {
+    // This is where we are doing the modulus of  dest % mod
+    size_t byteShift = an - mn + 1;
+    size_t bitsLarger = byteShift << 3;
+
+    // siz is the byte width of the numbers we need to deal with
+    const size_t siz = s_size + byteShift;
+
+    // shift 'mod' in to place directly in to 'tmp'  (eg: tmp = mod << byteShift)
+    //memcpy(&tmp[byteShift], mod, s_size);
+
+    // here we are doing the shift 'virtually' in that we just keep track of the shift amount
+    // and then fix up below as applicable
+    const uint8_t* tmpPtr = mod;
+    size_t byt = byteShift;
+
+    do {
+      size_t idx = siz - 1;
+      // we are checking if dest >= tmp. We exit the loop when they stop being equal as we scan from high
+      // order bytes to low order ones. When they stop being equal (or we get to the end of the array)
+      // the comparison of the last value we were at will be the comparison result for the entire value
+      for (idx = siz - 1; idx >= byt; idx--)
+        if (dest[idx] != tmpPtr[idx-byt]) // here 'tmp' is indexed in a way as if it was shifted by byt bytes
+          break;
+      if (idx < byt)
+        for (idx = byt - 1; idx; idx--)
+          if (dest[idx]) // here we are in the area of the virtual 'tmp' that was shifted out to be filled with virtual 0s so this collapsed to just be checking if dest[idx] != 0
+            break;
+      /*
+      while (idx && dest[idx] == ((idx >= byt) ? tmpPtr[idx-byt] : 0) ) // Hot code path
+      {
+        idx--;
+      }
+      */
+      if (dest[idx] >= ((idx >= byt) ? tmpPtr[idx-byt] : 0) )
+      {
+        // here we subtract 'tmp' from 'dest'  (eg: dest -= tmp)
+        uint32_t borrow = 0; // carry / borrow flag
+        //for (size_t i = 0; i < siz; i++)
+        for (size_t i = byt; i < siz; i++)
+        {
+          // apply the borrow from previous iteration
+          // uint32_t x = dest[i] - borrow - ((i >= byt) ? tmpPtr[i-byt] : 0); // tmp[i];  // Hot
+          /*
+          uint32_t x = dest[i] - borrow - tmpPtr[i-byt]; // Hot code path
+          dest[i] = x;
+          borrow = (x >> 8) & 1;
+          */
+          uint32_t t = tmpPtr[i-byt]; // OOO
+          uint32_t sub = borrow + t;
+          uint32_t x = dest[i]; // OOO
+          x = x - sub;
+          dest[i] = x; // OO
+          x >>= 8;
+          x &= 1;
+          borrow = x;// (x >> 8) & 1;
+        }
+      }
+
+      if (bitsLarger--)
+      {
+        // 'tmp' = 'mod' << bitsLarger
+        // but we just update the virtual tmp values (mod has precomputed shifts from 0-7)
+        tmpPtr = mod + s_size*2*(bitsLarger & 7);
+        byt = bitsLarger >> 3;
+      }
+      bitsLarger++;
+    } while (bitsLarger--);
+  }
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftLeft(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the left shift is really shuffling the bytes right
+  // Within the bytes we still shift left
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  for (int i = byteShift; i < s_size; i++)
+  {
+    uint32_t var = a[i - byteShift] << shift;
+    res.m_pimpl->m_data[i] = var | overflow;
+    overflow = var >> 8;
+  }
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftRight(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the right shift is really shuffling the bytes left
+  // Within bytes we still shift right
+  // Shift right is easier because we don't need to change any buffer sizes
+  // TODO: check we are zeroing out properly, or normalizing which might be easier
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  //for (size_t i = s_size - 1; i; i--)
+  for (size_t i = byteShift; i < s_size - 1; i++)
+  {
+    uint8_t var = a[i] >> shift;
+    overflow    = a[i+1] << (8 - shift);
+    res.m_pimpl->m_data[i-byteShift] = var | overflow;
+  }
+  res.m_pimpl->m_data[s_size-1-byteShift] = a[s_size-1] >> shift;
+  return res;
+}
+

diff --git a/Crypto/Integer/Integer.cpp b/Crypto/Integer/Integer.cpp
new file mode 100644
index 0000000..52318b2
--- /dev/null
+++ b/Crypto/Integer/Integer.cpp
@@ -0,0 +1,746 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#include "Integer.h"
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include "../../Util.h"
+
+enum ComparisonResult
+{
+  CR_Equal,
+  CR_LessThan,
+  CR_GreaterThan
+};
+
+struct uint2048_t::Pimpl
+{
+  void Init(const char* str);
+  void Copy(const uint2048_t& other);
+  void Normalize();
+  size_t GetSize();
+
+  static ComparisonResult Compare(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Add(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Subtract(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Multiply(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Divide(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Modulus(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t ShiftLeft(const uint2048_t& a, int shift);
+  static uint2048_t ShiftRight(const uint2048_t& a, int shift);
+  static uint2048_t Or(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t And(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Xor(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Not(const uint2048_t& a);
+
+  // Helper function to optimize the ExpMod function
+  static void MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest);
+
+#define s_size  257 // Fixed at 2048 bits (256*8)
+  uint8_t m_data[s_size];
+};
+
+uint2048_t::uint2048_t() // Default constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init("0");
+}
+
+uint2048_t::uint2048_t(const uint2048_t& other) // Copy constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Copy(other);
+}
+
+uint2048_t::uint2048_t(uint64_t a_number) // Create from a number
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  std::string numStr = int2hex(a_number);
+  //printf("\n init from int: 0x%x -> %s\n", int(a_number), numStr.c_str());
+  m_pimpl->Init(numStr.c_str());
+  //m_pimpl->Init(int2hex(a_number).c_str());
+}
+
+uint2048_t::uint2048_t(const char* str) // Create from a string
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init(str);
+}
+
+uint2048_t::~uint2048_t() // Destructor
+{
+}
+
+void uint2048_t::operator=(const uint2048_t& other) // Assignment operator
+{
+  m_pimpl->Copy(other);
+}
+
+bool uint2048_t::operator!() const // Is zero
+{
+  return (Pimpl::Compare(*this, uint2048_t()) == CR_Equal);
+}
+
+bool uint2048_t::operator!=(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) != CR_Equal);
+}
+
+bool uint2048_t::operator==(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_Equal);
+}
+
+bool uint2048_t::operator<(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_LessThan);
+}
+
+bool uint2048_t::operator>(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_GreaterThan);
+}
+
+bool uint2048_t::operator<=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_LessThan);
+}
+
+bool uint2048_t::operator>=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_GreaterThan);
+}
+
+bool uint2048_t::operator&&(const uint2048_t& other) const
+{
+  return (!!*this) && (!!other);
+}
+
+bool uint2048_t::operator||(const uint2048_t& other) const
+{
+  return (!!*this) || (!!other);
+}
+
+uint2048_t& uint2048_t::operator--()
+{
+  *this = Pimpl::Subtract(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator++()
+{
+  *this = Pimpl::Add(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t uint2048_t::operator++(int) 
+{
+  uint2048_t tmp(*this);
+  operator++();
+  return tmp;
+}
+
+uint2048_t uint2048_t::operator--(int) 
+{
+  uint2048_t tmp(*this);
+  operator--();
+  return tmp;
+}
+
+uint2048_t& uint2048_t::operator-=(const uint2048_t& other)
+{
+  *this = Pimpl::Subtract(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator+=(const uint2048_t& other)
+{
+  *this = Pimpl::Add(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator*=(const uint2048_t& other) 
+{
+  *this = Pimpl::Multiply(*this, other);
+  return *this;
+}
+
+uint2048_t uint2048_t::operator-(const uint2048_t& other) const
+{
+  return Pimpl::Subtract(*this, other);
+}
+
+uint2048_t uint2048_t::operator+(const uint2048_t& other) const
+{
+  return Pimpl::Add(*this, other);
+}
+
+uint2048_t uint2048_t::operator*(const uint2048_t& other) const
+{
+  return Pimpl::Multiply(*this, other);
+}
+
+uint2048_t uint2048_t::operator/(const uint2048_t& other) const
+{
+  return Pimpl::Divide(*this, other);
+}
+
+uint2048_t uint2048_t::operator%(const uint2048_t& other) const
+{
+  return Pimpl::Modulus(*this, other);
+}
+
+uint2048_t uint2048_t::operator&(const uint2048_t& other) const
+{
+  return Pimpl::And(*this, other);
+}
+
+uint2048_t uint2048_t::operator|(const uint2048_t& other) const
+{
+  return Pimpl::Or(*this, other);
+}
+
+uint2048_t uint2048_t::operator^(const uint2048_t& other) const
+{
+  return Pimpl::Xor(*this, other);
+}
+
+uint2048_t uint2048_t::operator~() const
+{
+  return Pimpl::Not(*this);
+}
+
+uint2048_t uint2048_t::operator>>(int shift) const
+{
+  return Pimpl::ShiftRight(*this, shift);
+}
+
+uint2048_t uint2048_t::operator<<(int shift) const
+{
+  return Pimpl::ShiftLeft(*this, shift);
+}
+
+uint2048_t& uint2048_t::operator>>=(int shift)
+{
+  *this = Pimpl::ShiftRight(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator<<=(int shift)
+{
+  *this = Pimpl::ShiftLeft(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator/=(const uint2048_t& other)
+{
+  *this = Pimpl::Divide(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator%=(const uint2048_t& other)
+{
+  *this = Pimpl::Modulus(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator&=(const uint2048_t& other)
+{
+  *this = Pimpl::And(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator|=(const uint2048_t& other)
+{
+  *this = Pimpl::Or(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator^=(const uint2048_t& other)
+{
+  *this = Pimpl::Xor(*this, other);
+  return *this;
+}
+
+uint2048_t::operator uint64_t() const
+{
+  return (uint64_t)strtoull(std::string(*this).c_str(), 0, 16);
+}
+
+void uint2048_t::Print() const
+{
+  std::cout << std::string(*this);
+}
+
+// base ^ exponent % modulus
+uint2048_t uint2048_t::ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus)
+{
+  uint2048_t dst(1);
+  uint2048_t a = base % modulus;
+  uint2048_t b = exponent % modulus;
+
+  size_t mn = modulus.m_pimpl->GetSize();
+  const uint8_t *modBits = modulus.m_pimpl->m_data;
+  uint8_t *aBits = a.m_pimpl->m_data;
+  size_t an = a.m_pimpl->GetSize();
+
+#if 1 
+  // pre-computed modulus shifted by 0 to 7
+  uint8_t tmpModBits[s_size*16];
+  for (int shift = 0; shift < 8; shift++)
+  {
+    uint32_t overflow = 0;
+    uint8_t* mb = &tmpModBits[s_size*2*shift];
+    memset(mb, 0, s_size*2);
+
+    for (int i = 0; i < s_size; i++)
+    {
+      uint32_t var = modBits[i] << shift;
+      mb[i] = var | overflow;
+      overflow = var >> 8;
+    }
+  }
+#endif
+
+  uint8_t tmp1Bits[s_size*2];
+  uint8_t *dstBits = dst.m_pimpl->m_data;
+
+  //printf("\n a: %s\n", std::string(a).c_str());
+  //printf("\n b: %s\n", std::string(b).c_str());
+  while (!!b) // while (bnbits)
+  {
+    if (b[0] & 1)  // if (!!(b & BigInteger(1)))
+    {
+      //dst = (dst * a) % modulus;
+      memset(tmp1Bits, 0, s_size*2);
+      size_t dn = dst.m_pimpl->GetSize();
+      Pimpl::MulMod(dstBits, aBits, tmpModBits, dn, an, mn, tmp1Bits);
+      memcpy(dstBits, tmp1Bits, s_size);
+    }
+
+    //a = (a * a) % modulus;
+    memset(tmp1Bits, 0, s_size*2);
+    Pimpl::MulMod(aBits, aBits, tmpModBits, an, an, mn, tmp1Bits);
+    memcpy(aBits, tmp1Bits, s_size);
+    an = a.m_pimpl->GetSize();
+    
+    b >>= 1; // perhaps could instead of shifting, test a particular bit instead of lowest bit
+    // bnbits--;
+  }
+
+  return dst;
+}
+
+uint2048_t::operator std::string() const
+{
+  std::ostringstream oss;
+  oss << "0x";
+  size_t n = m_pimpl->GetSize();
+  for (size_t i = 0; i < n; i++)
+    oss << std::setfill('0') << std::setw(2) << std::hex << (int)m_pimpl->m_data[n - i - 1];
+  return oss.str();
+}
+
+size_t uint2048_t::Size() const
+{
+  return (sizeof(m_pimpl->m_data) / sizeof(m_pimpl->m_data[0]));
+}
+
+uint8_t uint2048_t::operator[](size_t index) const
+{
+  return m_pimpl->m_data[index];
+}
+
+
+////////////////// PRIVATE //////////////////////////////////////////////////
+
+static unsigned int HexCharToInt(unsigned char ch)
+{
+  unsigned int c = tolower(ch);
+  return (c >= 'a') ? c - 'a' + 10 : c - '0';
+}
+
+void uint2048_t::Pimpl::Init(const char* str)
+{
+  int len = strlen(str);
+  memset(m_data, 0, s_size);
+  size_t idx = 0;
+  for (int i = 0; i < len; i++, idx++)
+  {
+    int val = HexCharToInt(str[len - 1 - i]);
+    i++;
+    if (i < len)
+      val |= HexCharToInt(str[len - 1 - i]) << 4;
+    m_data[idx] = val;
+  }
+}
+
+void uint2048_t::Pimpl::Copy(const uint2048_t& other)
+{
+  memcpy(m_data, other.m_pimpl->m_data, sizeof(m_data));
+}
+
+void uint2048_t::Pimpl::Normalize()
+{
+}
+
+size_t uint2048_t::Pimpl::GetSize()
+{
+  size_t an;
+  for (an = s_size - 1; an; an--)
+    if (m_data[an])
+      break;
+  an++;
+  return an;
+}
+
+ComparisonResult uint2048_t::Pimpl::Compare(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = s_size - 1;
+  ComparisonResult result = CR_Equal;
+  do {
+    if (a[an] > b[an]) {
+      result = CR_GreaterThan; // a > b
+      break;
+    }
+    if (a[an] < b[an]) {
+      result = CR_LessThan; // b > a
+      break;
+    }
+  } while (an--);
+  /*
+  // This extra loop has no meaning except to try to show how we might reduce
+  // timing based side channel attacks by making the running time more consistent
+  int dummy = 0;
+  do {
+    if (a[an] > b[an]) {
+      dummy++;
+    }
+    if (a[an] < b[an]) {
+      dummy--;
+    }
+  } while (an--);
+  */
+  return result; // the two values are equal
+}
+
+uint2048_t uint2048_t::Pimpl::Or(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] |= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::And(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] &= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Xor(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] ^= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Not(const uint2048_t& a)
+{
+  uint2048_t result;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] = ~a[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Add(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint8_t carry = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the carry from previous iteration
+    uint32_t tmp = a[i] + carry + b[i];
+    //if (i != s_size - 1)
+    result.m_pimpl->m_data[i] = tmp;
+    carry = tmp >> 8;
+  }
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Subtract(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint32_t borrow = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the borrow from previous iteration
+    uint32_t tmp = a[i] - borrow - b[i];
+    result.m_pimpl->m_data[i] = tmp;
+    borrow = (tmp >> 8) & 1;
+  }
+  return result;
+}
+
+// big number multiply algorithms
+// https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
+uint2048_t uint2048_t::Pimpl::Multiply(const uint2048_t& a, const uint2048_t& b)
+{
+  //if (b > a) // Swap if a is not the larger value. We can now assume a is the larger value after
+  //  return Multiply(b, a);
+
+  // Work out expected output size
+  size_t an = a.m_pimpl->GetSize();
+  size_t bn = b.m_pimpl->GetSize();
+  //if (bn > an)
+  //  return Multiply(b, a);
+
+/*
+a=   1234   a[0] = 4, a[1] = 3 ...
+b=  x5678
+    -----
+      4x8            r[0+0] +=                      (a[0] x b[0]) % 10
+     +4x7x10         r[0+1] += (a[0] x b[0]) / 10 + (a[0] x b[1]) % 10
+     +4x6x100        r[0+2] += (a[0] x b[1]) / 10 + (a[0] x b[2]) % 10
+     +4x5x1000       r[0+3] += (a[0] x b[2]) / 10 + (a[0] x b[3]) % 10
+     +3x8x10         r[1+0] +=                      (a[1] x b[0]) % 10
+     +3x7x100        r[1+1] += (a[1] x b[0]) / 10 + (a[1] x b[1]) % 10
+     +3x6x1000
+     +3x5x10000
+  -----
+*/
+
+  // Setup a destination value
+  uint2048_t d;
+  uint8_t dest[s_size*2];
+  memset(dest, 0, s_size*2);
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    //dest[bn+j] = dest[bn+j] + c1 + c2;
+    dest[bn+j] += c1 + c2;
+  }
+  memcpy(d.m_pimpl->m_data, dest, s_size);
+  return d;
+}
+
+uint2048_t uint2048_t::Pimpl::Divide(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  if (bn > an)
+    return uint2048_t();
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t bit = uint2048_t(1) << bitsLarger;
+  //printf(" bit: %s \n", std::string(bit).c_str());
+  uint2048_t tmp = b << bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  uint2048_t res = uint2048_t();
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      res += bit;
+    }
+    tmp >>= 1;
+    bit >>= 1;
+  } while (bitsLarger--);
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::Modulus(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  //printf(" an: %i bn: %i \n", an, bn);
+  //printf("\n");
+  if (bn > an)
+  {
+    //printf("\n bn > an   an: %i bn: %i \n", an, bn);
+    return a;
+  }
+  if (bn == an)
+    if (b > a)
+    {
+      //printf("\n b > a   an: %i bn: %i \n", an, bn);
+      //printf(" a: %s \n", std::string(a).c_str());
+      //printf(" b: %s \n\n", std::string(b).c_str());
+      return a;
+    }
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t tmp = b;
+  tmp <<= bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      //printf(" mod: %s \n", std::string(mod).c_str());
+    }
+    tmp >>= 1;
+    //printf(" tmp: %s \n", std::string(tmp).c_str());
+  } while (bitsLarger--);
+  //printf(" mod: %s \n", std::string(mod).c_str());
+  return mod;
+}
+
+void uint2048_t::Pimpl::MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest)
+//uint2048_t uint2048_t::Pimpl::MulMod(const uint2048_t& a, const uint2048_t& b, const uint2048_t& mod)
+{
+  // This is where we are doing the multiplication in to dest (which is double width)
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    dest[bn+j] += c1 + c2;
+  }
+
+  // 'an' is now the size of dest
+  an = an + bn;
+
+  uint8_t* dstPtr = dest;
+
+  // check the magnitude of modulus is about or less than dest (otherwise we can just return dest)
+  if (mn <= an)
+  {
+    // This is where we are doing the modulus of  dest % mod
+    size_t byteShift = an - mn + 1;
+    size_t bitsLarger = byteShift << 3;
+
+    // siz is the byte width of the numbers we need to deal with
+    const size_t siz = s_size + byteShift;
+
+    // shift 'mod' in to place directly in to 'tmp'  (eg: tmp = mod << byteShift)
+    //memcpy(&tmp[byteShift], mod, s_size);
+
+    // here we are doing the shift 'virtually' in that we just keep track of the shift amount
+    // and then fix up below as applicable
+    const uint8_t* tmpPtr = mod;
+    size_t byt = byteShift;
+
+    do {
+      size_t idx = siz - 1;
+      // we are checking if dest >= tmp. We exit the loop when they stop being equal as we scan from high
+      // order bytes to low order ones. When they stop being equal (or we get to the end of the array)
+      // the comparison of the last value we were at will be the comparison result for the entire value
+      for (idx = siz - 1; idx >= byt; idx--)
+        if (dest[idx] != tmpPtr[idx-byt]) // here 'tmp' is indexed in a way as if it was shifted by byt bytes
+          break;
+      if (idx < byt)
+        for (idx = byt - 1; idx; idx--)
+          if (dest[idx]) // here we are in the area of the virtual 'tmp' that was shifted out to be filled with virtual 0s so this collapsed to just be checking if dest[idx] != 0
+            break;
+      /*
+      while (idx && dest[idx] == ((idx >= byt) ? tmpPtr[idx-byt] : 0) ) // Hot code path
+      {
+        idx--;
+      }
+      */
+      if (dest[idx] >= ((idx >= byt) ? tmpPtr[idx-byt] : 0) )
+      {
+        // here we subtract 'tmp' from 'dest'  (eg: dest -= tmp)
+        uint32_t borrow = 0; // carry / borrow flag
+        //for (size_t i = 0; i < siz; i++)
+        for (size_t i = byt; i < siz; i++)
+        {
+          // apply the borrow from previous iteration
+          // uint32_t x = dest[i] - borrow - ((i >= byt) ? tmpPtr[i-byt] : 0); // tmp[i];  // Hot
+          /*
+          uint32_t x = dest[i] - borrow - tmpPtr[i-byt]; // Hot code path
+          dest[i] = x;
+          borrow = (x >> 8) & 1;
+          */
+          uint32_t t = tmpPtr[i-byt]; // OOO
+          uint32_t sub = borrow + t;
+          uint32_t x = dest[i]; // OOO
+          x = x - sub;
+          dest[i] = x; // OO
+          x >>= 8;
+          x &= 1;
+          borrow = x;// (x >> 8) & 1;
+        }
+      }
+
+      if (bitsLarger--)
+      {
+        // 'tmp' = 'mod' << bitsLarger
+        // but we just update the virtual tmp values (mod has precomputed shifts from 0-7)
+        tmpPtr = mod + s_size*2*(bitsLarger & 7);
+        byt = bitsLarger >> 3;
+      }
+      bitsLarger++;
+    } while (bitsLarger--);
+  }
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftLeft(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the left shift is really shuffling the bytes right
+  // Within the bytes we still shift left
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  for (int i = byteShift; i < s_size; i++)
+  {
+    uint32_t var = a[i - byteShift] << shift;
+    res.m_pimpl->m_data[i] = var | overflow;
+    overflow = var >> 8;
+  }
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftRight(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the right shift is really shuffling the bytes left
+  // Within bytes we still shift right
+  // Shift right is easier because we don't need to change any buffer sizes
+  // TODO: check we are zeroing out properly, or normalizing which might be easier
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  //for (size_t i = s_size - 1; i; i--)
+  for (size_t i = byteShift; i < s_size - 1; i++)
+  {
+    uint8_t var = a[i] >> shift;
+    overflow    = a[i+1] << (8 - shift);
+    res.m_pimpl->m_data[i-byteShift] = var | overflow;
+  }
+  res.m_pimpl->m_data[s_size-1-byteShift] = a[s_size-1] >> shift;
+  return res;
+}
+
diff --git a/Crypto/Integer/Integer.h b/Crypto/Integer/Integer.h
new file mode 100644
index 0000000..e2a39ec
--- /dev/null
+++ b/Crypto/Integer/Integer.h
@@ -0,0 +1,91 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#ifndef INTEGER_H
+#define INTEGER_H
+
+#include <stdint.h>
+#include <vector>
+#include <string>
+#include <memory>
+
+/*
+  uint2048_t is a class that behaves similar to a regular built in type
+  however it can represent arbitarily large values, eg 1024 bit numbers
+  which is commonly the case in cryptographic code or other special uses
+  of maths. As its name suggests, it only holds integers, and positive
+  ones at that. Treat it like you would an unsigned int that can't
+  overflow, and in the case where it could go negative, it truncates to
+  zero (this happens when subtracting a larger number from a smaller one).
+*/
+class uint2048_t
+{
+public:
+  uint2048_t();                            // Default constructor (the value will be zero)
+  uint2048_t(const uint2048_t& other);     // Copy constructor
+  uint2048_t(uint64_t a_number);           // Create from a number
+  uint2048_t(const char* str);             // Create from a string
+  ~uint2048_t();                           // Destructor
+  void operator=(const uint2048_t& other); // Assignment operator
+
+  // Operator overloads
+  bool operator!() const; // Is zero
+  bool operator<(const uint2048_t& other) const;
+  bool operator>(const uint2048_t& other) const;
+  bool operator<=(const uint2048_t& other) const;
+  bool operator>=(const uint2048_t& other) const;
+  bool operator==(const uint2048_t& other) const;
+  bool operator!=(const uint2048_t& other) const;
+  bool operator&&(const uint2048_t& other) const;
+  bool operator||(const uint2048_t& other) const;
+  uint2048_t operator>>(int shift) const;
+  uint2048_t operator<<(int shift) const;
+  uint2048_t operator--(int);
+  uint2048_t operator++(int);
+  uint2048_t operator-(const uint2048_t& other) const;
+  uint2048_t operator+(const uint2048_t& other) const;
+  uint2048_t operator*(const uint2048_t& other) const;
+  uint2048_t operator/(const uint2048_t& other) const;
+  uint2048_t operator%(const uint2048_t& other) const;
+  uint2048_t operator&(const uint2048_t& other) const;
+  uint2048_t operator|(const uint2048_t& other) const;
+  uint2048_t operator^(const uint2048_t& other) const;
+  uint2048_t operator~() const;
+  uint2048_t& operator>>=(int shift);
+  uint2048_t& operator<<=(int shift);
+  uint2048_t& operator--();
+  uint2048_t& operator++();
+  uint2048_t& operator-=(const uint2048_t& other);
+  uint2048_t& operator+=(const uint2048_t& other);
+  uint2048_t& operator*=(const uint2048_t& other);
+  uint2048_t& operator/=(const uint2048_t& other);
+  uint2048_t& operator%=(const uint2048_t& other);
+  uint2048_t& operator&=(const uint2048_t& other);
+  uint2048_t& operator|=(const uint2048_t& other);
+  uint2048_t& operator^=(const uint2048_t& other);
+
+  explicit operator std::string() const;
+  explicit operator uint64_t() const;
+  uint8_t operator[](size_t index) const;
+  void Print() const;
+  size_t Size() const;
+
+  // Returns the quotent, and modulus in modulusResult of numerator / denominator
+  static uint2048_t DivMod(const uint2048_t& numerator, const uint2048_t& denominator, uint2048_t& modulusResult);
+
+  // Returns the result of:  base ^ exponent % modulus
+  static uint2048_t ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus);
+
+private:
+  // Implementation hidden behind pimpl
+  // helps with compile times
+  // helps maintain binary compatibility
+  // makes it possible to easily swap out implementations
+  struct Pimpl;
+  std::unique_ptr<Pimpl> m_pimpl;
+};
+
+#endif // INTEGER_H
+

diff --git a/Crypto/Integer/Integer.cpp b/Crypto/Integer/Integer.cpp
new file mode 100644
index 0000000..52318b2
--- /dev/null
+++ b/Crypto/Integer/Integer.cpp
@@ -0,0 +1,746 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#include "Integer.h"
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include "../../Util.h"
+
+enum ComparisonResult
+{
+  CR_Equal,
+  CR_LessThan,
+  CR_GreaterThan
+};
+
+struct uint2048_t::Pimpl
+{
+  void Init(const char* str);
+  void Copy(const uint2048_t& other);
+  void Normalize();
+  size_t GetSize();
+
+  static ComparisonResult Compare(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Add(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Subtract(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Multiply(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Divide(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Modulus(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t ShiftLeft(const uint2048_t& a, int shift);
+  static uint2048_t ShiftRight(const uint2048_t& a, int shift);
+  static uint2048_t Or(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t And(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Xor(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Not(const uint2048_t& a);
+
+  // Helper function to optimize the ExpMod function
+  static void MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest);
+
+#define s_size  257 // Fixed at 2048 bits (256*8)
+  uint8_t m_data[s_size];
+};
+
+uint2048_t::uint2048_t() // Default constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init("0");
+}
+
+uint2048_t::uint2048_t(const uint2048_t& other) // Copy constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Copy(other);
+}
+
+uint2048_t::uint2048_t(uint64_t a_number) // Create from a number
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  std::string numStr = int2hex(a_number);
+  //printf("\n init from int: 0x%x -> %s\n", int(a_number), numStr.c_str());
+  m_pimpl->Init(numStr.c_str());
+  //m_pimpl->Init(int2hex(a_number).c_str());
+}
+
+uint2048_t::uint2048_t(const char* str) // Create from a string
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init(str);
+}
+
+uint2048_t::~uint2048_t() // Destructor
+{
+}
+
+void uint2048_t::operator=(const uint2048_t& other) // Assignment operator
+{
+  m_pimpl->Copy(other);
+}
+
+bool uint2048_t::operator!() const // Is zero
+{
+  return (Pimpl::Compare(*this, uint2048_t()) == CR_Equal);
+}
+
+bool uint2048_t::operator!=(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) != CR_Equal);
+}
+
+bool uint2048_t::operator==(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_Equal);
+}
+
+bool uint2048_t::operator<(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_LessThan);
+}
+
+bool uint2048_t::operator>(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_GreaterThan);
+}
+
+bool uint2048_t::operator<=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_LessThan);
+}
+
+bool uint2048_t::operator>=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_GreaterThan);
+}
+
+bool uint2048_t::operator&&(const uint2048_t& other) const
+{
+  return (!!*this) && (!!other);
+}
+
+bool uint2048_t::operator||(const uint2048_t& other) const
+{
+  return (!!*this) || (!!other);
+}
+
+uint2048_t& uint2048_t::operator--()
+{
+  *this = Pimpl::Subtract(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator++()
+{
+  *this = Pimpl::Add(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t uint2048_t::operator++(int) 
+{
+  uint2048_t tmp(*this);
+  operator++();
+  return tmp;
+}
+
+uint2048_t uint2048_t::operator--(int) 
+{
+  uint2048_t tmp(*this);
+  operator--();
+  return tmp;
+}
+
+uint2048_t& uint2048_t::operator-=(const uint2048_t& other)
+{
+  *this = Pimpl::Subtract(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator+=(const uint2048_t& other)
+{
+  *this = Pimpl::Add(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator*=(const uint2048_t& other) 
+{
+  *this = Pimpl::Multiply(*this, other);
+  return *this;
+}
+
+uint2048_t uint2048_t::operator-(const uint2048_t& other) const
+{
+  return Pimpl::Subtract(*this, other);
+}
+
+uint2048_t uint2048_t::operator+(const uint2048_t& other) const
+{
+  return Pimpl::Add(*this, other);
+}
+
+uint2048_t uint2048_t::operator*(const uint2048_t& other) const
+{
+  return Pimpl::Multiply(*this, other);
+}
+
+uint2048_t uint2048_t::operator/(const uint2048_t& other) const
+{
+  return Pimpl::Divide(*this, other);
+}
+
+uint2048_t uint2048_t::operator%(const uint2048_t& other) const
+{
+  return Pimpl::Modulus(*this, other);
+}
+
+uint2048_t uint2048_t::operator&(const uint2048_t& other) const
+{
+  return Pimpl::And(*this, other);
+}
+
+uint2048_t uint2048_t::operator|(const uint2048_t& other) const
+{
+  return Pimpl::Or(*this, other);
+}
+
+uint2048_t uint2048_t::operator^(const uint2048_t& other) const
+{
+  return Pimpl::Xor(*this, other);
+}
+
+uint2048_t uint2048_t::operator~() const
+{
+  return Pimpl::Not(*this);
+}
+
+uint2048_t uint2048_t::operator>>(int shift) const
+{
+  return Pimpl::ShiftRight(*this, shift);
+}
+
+uint2048_t uint2048_t::operator<<(int shift) const
+{
+  return Pimpl::ShiftLeft(*this, shift);
+}
+
+uint2048_t& uint2048_t::operator>>=(int shift)
+{
+  *this = Pimpl::ShiftRight(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator<<=(int shift)
+{
+  *this = Pimpl::ShiftLeft(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator/=(const uint2048_t& other)
+{
+  *this = Pimpl::Divide(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator%=(const uint2048_t& other)
+{
+  *this = Pimpl::Modulus(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator&=(const uint2048_t& other)
+{
+  *this = Pimpl::And(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator|=(const uint2048_t& other)
+{
+  *this = Pimpl::Or(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator^=(const uint2048_t& other)
+{
+  *this = Pimpl::Xor(*this, other);
+  return *this;
+}
+
+uint2048_t::operator uint64_t() const
+{
+  return (uint64_t)strtoull(std::string(*this).c_str(), 0, 16);
+}
+
+void uint2048_t::Print() const
+{
+  std::cout << std::string(*this);
+}
+
+// base ^ exponent % modulus
+uint2048_t uint2048_t::ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus)
+{
+  uint2048_t dst(1);
+  uint2048_t a = base % modulus;
+  uint2048_t b = exponent % modulus;
+
+  size_t mn = modulus.m_pimpl->GetSize();
+  const uint8_t *modBits = modulus.m_pimpl->m_data;
+  uint8_t *aBits = a.m_pimpl->m_data;
+  size_t an = a.m_pimpl->GetSize();
+
+#if 1 
+  // pre-computed modulus shifted by 0 to 7
+  uint8_t tmpModBits[s_size*16];
+  for (int shift = 0; shift < 8; shift++)
+  {
+    uint32_t overflow = 0;
+    uint8_t* mb = &tmpModBits[s_size*2*shift];
+    memset(mb, 0, s_size*2);
+
+    for (int i = 0; i < s_size; i++)
+    {
+      uint32_t var = modBits[i] << shift;
+      mb[i] = var | overflow;
+      overflow = var >> 8;
+    }
+  }
+#endif
+
+  uint8_t tmp1Bits[s_size*2];
+  uint8_t *dstBits = dst.m_pimpl->m_data;
+
+  //printf("\n a: %s\n", std::string(a).c_str());
+  //printf("\n b: %s\n", std::string(b).c_str());
+  while (!!b) // while (bnbits)
+  {
+    if (b[0] & 1)  // if (!!(b & BigInteger(1)))
+    {
+      //dst = (dst * a) % modulus;
+      memset(tmp1Bits, 0, s_size*2);
+      size_t dn = dst.m_pimpl->GetSize();
+      Pimpl::MulMod(dstBits, aBits, tmpModBits, dn, an, mn, tmp1Bits);
+      memcpy(dstBits, tmp1Bits, s_size);
+    }
+
+    //a = (a * a) % modulus;
+    memset(tmp1Bits, 0, s_size*2);
+    Pimpl::MulMod(aBits, aBits, tmpModBits, an, an, mn, tmp1Bits);
+    memcpy(aBits, tmp1Bits, s_size);
+    an = a.m_pimpl->GetSize();
+    
+    b >>= 1; // perhaps could instead of shifting, test a particular bit instead of lowest bit
+    // bnbits--;
+  }
+
+  return dst;
+}
+
+uint2048_t::operator std::string() const
+{
+  std::ostringstream oss;
+  oss << "0x";
+  size_t n = m_pimpl->GetSize();
+  for (size_t i = 0; i < n; i++)
+    oss << std::setfill('0') << std::setw(2) << std::hex << (int)m_pimpl->m_data[n - i - 1];
+  return oss.str();
+}
+
+size_t uint2048_t::Size() const
+{
+  return (sizeof(m_pimpl->m_data) / sizeof(m_pimpl->m_data[0]));
+}
+
+uint8_t uint2048_t::operator[](size_t index) const
+{
+  return m_pimpl->m_data[index];
+}
+
+
+////////////////// PRIVATE //////////////////////////////////////////////////
+
+static unsigned int HexCharToInt(unsigned char ch)
+{
+  unsigned int c = tolower(ch);
+  return (c >= 'a') ? c - 'a' + 10 : c - '0';
+}
+
+void uint2048_t::Pimpl::Init(const char* str)
+{
+  int len = strlen(str);
+  memset(m_data, 0, s_size);
+  size_t idx = 0;
+  for (int i = 0; i < len; i++, idx++)
+  {
+    int val = HexCharToInt(str[len - 1 - i]);
+    i++;
+    if (i < len)
+      val |= HexCharToInt(str[len - 1 - i]) << 4;
+    m_data[idx] = val;
+  }
+}
+
+void uint2048_t::Pimpl::Copy(const uint2048_t& other)
+{
+  memcpy(m_data, other.m_pimpl->m_data, sizeof(m_data));
+}
+
+void uint2048_t::Pimpl::Normalize()
+{
+}
+
+size_t uint2048_t::Pimpl::GetSize()
+{
+  size_t an;
+  for (an = s_size - 1; an; an--)
+    if (m_data[an])
+      break;
+  an++;
+  return an;
+}
+
+ComparisonResult uint2048_t::Pimpl::Compare(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = s_size - 1;
+  ComparisonResult result = CR_Equal;
+  do {
+    if (a[an] > b[an]) {
+      result = CR_GreaterThan; // a > b
+      break;
+    }
+    if (a[an] < b[an]) {
+      result = CR_LessThan; // b > a
+      break;
+    }
+  } while (an--);
+  /*
+  // This extra loop has no meaning except to try to show how we might reduce
+  // timing based side channel attacks by making the running time more consistent
+  int dummy = 0;
+  do {
+    if (a[an] > b[an]) {
+      dummy++;
+    }
+    if (a[an] < b[an]) {
+      dummy--;
+    }
+  } while (an--);
+  */
+  return result; // the two values are equal
+}
+
+uint2048_t uint2048_t::Pimpl::Or(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] |= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::And(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] &= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Xor(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] ^= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Not(const uint2048_t& a)
+{
+  uint2048_t result;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] = ~a[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Add(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint8_t carry = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the carry from previous iteration
+    uint32_t tmp = a[i] + carry + b[i];
+    //if (i != s_size - 1)
+    result.m_pimpl->m_data[i] = tmp;
+    carry = tmp >> 8;
+  }
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Subtract(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint32_t borrow = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the borrow from previous iteration
+    uint32_t tmp = a[i] - borrow - b[i];
+    result.m_pimpl->m_data[i] = tmp;
+    borrow = (tmp >> 8) & 1;
+  }
+  return result;
+}
+
+// big number multiply algorithms
+// https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
+uint2048_t uint2048_t::Pimpl::Multiply(const uint2048_t& a, const uint2048_t& b)
+{
+  //if (b > a) // Swap if a is not the larger value. We can now assume a is the larger value after
+  //  return Multiply(b, a);
+
+  // Work out expected output size
+  size_t an = a.m_pimpl->GetSize();
+  size_t bn = b.m_pimpl->GetSize();
+  //if (bn > an)
+  //  return Multiply(b, a);
+
+/*
+a=   1234   a[0] = 4, a[1] = 3 ...
+b=  x5678
+    -----
+      4x8            r[0+0] +=                      (a[0] x b[0]) % 10
+     +4x7x10         r[0+1] += (a[0] x b[0]) / 10 + (a[0] x b[1]) % 10
+     +4x6x100        r[0+2] += (a[0] x b[1]) / 10 + (a[0] x b[2]) % 10
+     +4x5x1000       r[0+3] += (a[0] x b[2]) / 10 + (a[0] x b[3]) % 10
+     +3x8x10         r[1+0] +=                      (a[1] x b[0]) % 10
+     +3x7x100        r[1+1] += (a[1] x b[0]) / 10 + (a[1] x b[1]) % 10
+     +3x6x1000
+     +3x5x10000
+  -----
+*/
+
+  // Setup a destination value
+  uint2048_t d;
+  uint8_t dest[s_size*2];
+  memset(dest, 0, s_size*2);
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    //dest[bn+j] = dest[bn+j] + c1 + c2;
+    dest[bn+j] += c1 + c2;
+  }
+  memcpy(d.m_pimpl->m_data, dest, s_size);
+  return d;
+}
+
+uint2048_t uint2048_t::Pimpl::Divide(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  if (bn > an)
+    return uint2048_t();
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t bit = uint2048_t(1) << bitsLarger;
+  //printf(" bit: %s \n", std::string(bit).c_str());
+  uint2048_t tmp = b << bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  uint2048_t res = uint2048_t();
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      res += bit;
+    }
+    tmp >>= 1;
+    bit >>= 1;
+  } while (bitsLarger--);
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::Modulus(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  //printf(" an: %i bn: %i \n", an, bn);
+  //printf("\n");
+  if (bn > an)
+  {
+    //printf("\n bn > an   an: %i bn: %i \n", an, bn);
+    return a;
+  }
+  if (bn == an)
+    if (b > a)
+    {
+      //printf("\n b > a   an: %i bn: %i \n", an, bn);
+      //printf(" a: %s \n", std::string(a).c_str());
+      //printf(" b: %s \n\n", std::string(b).c_str());
+      return a;
+    }
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t tmp = b;
+  tmp <<= bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      //printf(" mod: %s \n", std::string(mod).c_str());
+    }
+    tmp >>= 1;
+    //printf(" tmp: %s \n", std::string(tmp).c_str());
+  } while (bitsLarger--);
+  //printf(" mod: %s \n", std::string(mod).c_str());
+  return mod;
+}
+
+void uint2048_t::Pimpl::MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest)
+//uint2048_t uint2048_t::Pimpl::MulMod(const uint2048_t& a, const uint2048_t& b, const uint2048_t& mod)
+{
+  // This is where we are doing the multiplication in to dest (which is double width)
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    dest[bn+j] += c1 + c2;
+  }
+
+  // 'an' is now the size of dest
+  an = an + bn;
+
+  uint8_t* dstPtr = dest;
+
+  // check the magnitude of modulus is about or less than dest (otherwise we can just return dest)
+  if (mn <= an)
+  {
+    // This is where we are doing the modulus of  dest % mod
+    size_t byteShift = an - mn + 1;
+    size_t bitsLarger = byteShift << 3;
+
+    // siz is the byte width of the numbers we need to deal with
+    const size_t siz = s_size + byteShift;
+
+    // shift 'mod' in to place directly in to 'tmp'  (eg: tmp = mod << byteShift)
+    //memcpy(&tmp[byteShift], mod, s_size);
+
+    // here we are doing the shift 'virtually' in that we just keep track of the shift amount
+    // and then fix up below as applicable
+    const uint8_t* tmpPtr = mod;
+    size_t byt = byteShift;
+
+    do {
+      size_t idx = siz - 1;
+      // we are checking if dest >= tmp. We exit the loop when they stop being equal as we scan from high
+      // order bytes to low order ones. When they stop being equal (or we get to the end of the array)
+      // the comparison of the last value we were at will be the comparison result for the entire value
+      for (idx = siz - 1; idx >= byt; idx--)
+        if (dest[idx] != tmpPtr[idx-byt]) // here 'tmp' is indexed in a way as if it was shifted by byt bytes
+          break;
+      if (idx < byt)
+        for (idx = byt - 1; idx; idx--)
+          if (dest[idx]) // here we are in the area of the virtual 'tmp' that was shifted out to be filled with virtual 0s so this collapsed to just be checking if dest[idx] != 0
+            break;
+      /*
+      while (idx && dest[idx] == ((idx >= byt) ? tmpPtr[idx-byt] : 0) ) // Hot code path
+      {
+        idx--;
+      }
+      */
+      if (dest[idx] >= ((idx >= byt) ? tmpPtr[idx-byt] : 0) )
+      {
+        // here we subtract 'tmp' from 'dest'  (eg: dest -= tmp)
+        uint32_t borrow = 0; // carry / borrow flag
+        //for (size_t i = 0; i < siz; i++)
+        for (size_t i = byt; i < siz; i++)
+        {
+          // apply the borrow from previous iteration
+          // uint32_t x = dest[i] - borrow - ((i >= byt) ? tmpPtr[i-byt] : 0); // tmp[i];  // Hot
+          /*
+          uint32_t x = dest[i] - borrow - tmpPtr[i-byt]; // Hot code path
+          dest[i] = x;
+          borrow = (x >> 8) & 1;
+          */
+          uint32_t t = tmpPtr[i-byt]; // OOO
+          uint32_t sub = borrow + t;
+          uint32_t x = dest[i]; // OOO
+          x = x - sub;
+          dest[i] = x; // OO
+          x >>= 8;
+          x &= 1;
+          borrow = x;// (x >> 8) & 1;
+        }
+      }
+
+      if (bitsLarger--)
+      {
+        // 'tmp' = 'mod' << bitsLarger
+        // but we just update the virtual tmp values (mod has precomputed shifts from 0-7)
+        tmpPtr = mod + s_size*2*(bitsLarger & 7);
+        byt = bitsLarger >> 3;
+      }
+      bitsLarger++;
+    } while (bitsLarger--);
+  }
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftLeft(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the left shift is really shuffling the bytes right
+  // Within the bytes we still shift left
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  for (int i = byteShift; i < s_size; i++)
+  {
+    uint32_t var = a[i - byteShift] << shift;
+    res.m_pimpl->m_data[i] = var | overflow;
+    overflow = var >> 8;
+  }
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftRight(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the right shift is really shuffling the bytes left
+  // Within bytes we still shift right
+  // Shift right is easier because we don't need to change any buffer sizes
+  // TODO: check we are zeroing out properly, or normalizing which might be easier
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  //for (size_t i = s_size - 1; i; i--)
+  for (size_t i = byteShift; i < s_size - 1; i++)
+  {
+    uint8_t var = a[i] >> shift;
+    overflow    = a[i+1] << (8 - shift);
+    res.m_pimpl->m_data[i-byteShift] = var | overflow;
+  }
+  res.m_pimpl->m_data[s_size-1-byteShift] = a[s_size-1] >> shift;
+  return res;
+}
+
diff --git a/Crypto/Integer/Integer.h b/Crypto/Integer/Integer.h
new file mode 100644
index 0000000..e2a39ec
--- /dev/null
+++ b/Crypto/Integer/Integer.h
@@ -0,0 +1,91 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#ifndef INTEGER_H
+#define INTEGER_H
+
+#include <stdint.h>
+#include <vector>
+#include <string>
+#include <memory>
+
+/*
+  uint2048_t is a class that behaves similar to a regular built in type
+  however it can represent arbitarily large values, eg 1024 bit numbers
+  which is commonly the case in cryptographic code or other special uses
+  of maths. As its name suggests, it only holds integers, and positive
+  ones at that. Treat it like you would an unsigned int that can't
+  overflow, and in the case where it could go negative, it truncates to
+  zero (this happens when subtracting a larger number from a smaller one).
+*/
+class uint2048_t
+{
+public:
+  uint2048_t();                            // Default constructor (the value will be zero)
+  uint2048_t(const uint2048_t& other);     // Copy constructor
+  uint2048_t(uint64_t a_number);           // Create from a number
+  uint2048_t(const char* str);             // Create from a string
+  ~uint2048_t();                           // Destructor
+  void operator=(const uint2048_t& other); // Assignment operator
+
+  // Operator overloads
+  bool operator!() const; // Is zero
+  bool operator<(const uint2048_t& other) const;
+  bool operator>(const uint2048_t& other) const;
+  bool operator<=(const uint2048_t& other) const;
+  bool operator>=(const uint2048_t& other) const;
+  bool operator==(const uint2048_t& other) const;
+  bool operator!=(const uint2048_t& other) const;
+  bool operator&&(const uint2048_t& other) const;
+  bool operator||(const uint2048_t& other) const;
+  uint2048_t operator>>(int shift) const;
+  uint2048_t operator<<(int shift) const;
+  uint2048_t operator--(int);
+  uint2048_t operator++(int);
+  uint2048_t operator-(const uint2048_t& other) const;
+  uint2048_t operator+(const uint2048_t& other) const;
+  uint2048_t operator*(const uint2048_t& other) const;
+  uint2048_t operator/(const uint2048_t& other) const;
+  uint2048_t operator%(const uint2048_t& other) const;
+  uint2048_t operator&(const uint2048_t& other) const;
+  uint2048_t operator|(const uint2048_t& other) const;
+  uint2048_t operator^(const uint2048_t& other) const;
+  uint2048_t operator~() const;
+  uint2048_t& operator>>=(int shift);
+  uint2048_t& operator<<=(int shift);
+  uint2048_t& operator--();
+  uint2048_t& operator++();
+  uint2048_t& operator-=(const uint2048_t& other);
+  uint2048_t& operator+=(const uint2048_t& other);
+  uint2048_t& operator*=(const uint2048_t& other);
+  uint2048_t& operator/=(const uint2048_t& other);
+  uint2048_t& operator%=(const uint2048_t& other);
+  uint2048_t& operator&=(const uint2048_t& other);
+  uint2048_t& operator|=(const uint2048_t& other);
+  uint2048_t& operator^=(const uint2048_t& other);
+
+  explicit operator std::string() const;
+  explicit operator uint64_t() const;
+  uint8_t operator[](size_t index) const;
+  void Print() const;
+  size_t Size() const;
+
+  // Returns the quotent, and modulus in modulusResult of numerator / denominator
+  static uint2048_t DivMod(const uint2048_t& numerator, const uint2048_t& denominator, uint2048_t& modulusResult);
+
+  // Returns the result of:  base ^ exponent % modulus
+  static uint2048_t ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus);
+
+private:
+  // Implementation hidden behind pimpl
+  // helps with compile times
+  // helps maintain binary compatibility
+  // makes it possible to easily swap out implementations
+  struct Pimpl;
+  std::unique_ptr<Pimpl> m_pimpl;
+};
+
+#endif // INTEGER_H
+
diff --git a/Crypto/Integer/Makefile b/Crypto/Integer/Makefile
new file mode 100644
index 0000000..1031bd2
--- /dev/null
+++ b/Crypto/Integer/Makefile
@@ -0,0 +1,7 @@
+
+TARGET       = tests
+CONFIG       = release
+SOURCES      = main.cpp Integer.cpp
+
+include ../../Makefile.inc
+

diff --git a/Crypto/Integer/Integer.cpp b/Crypto/Integer/Integer.cpp
new file mode 100644
index 0000000..52318b2
--- /dev/null
+++ b/Crypto/Integer/Integer.cpp
@@ -0,0 +1,746 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#include "Integer.h"
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include "../../Util.h"
+
+enum ComparisonResult
+{
+  CR_Equal,
+  CR_LessThan,
+  CR_GreaterThan
+};
+
+struct uint2048_t::Pimpl
+{
+  void Init(const char* str);
+  void Copy(const uint2048_t& other);
+  void Normalize();
+  size_t GetSize();
+
+  static ComparisonResult Compare(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Add(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Subtract(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Multiply(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Divide(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Modulus(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t ShiftLeft(const uint2048_t& a, int shift);
+  static uint2048_t ShiftRight(const uint2048_t& a, int shift);
+  static uint2048_t Or(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t And(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Xor(const uint2048_t& a, const uint2048_t& b);
+  static uint2048_t Not(const uint2048_t& a);
+
+  // Helper function to optimize the ExpMod function
+  static void MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest);
+
+#define s_size  257 // Fixed at 2048 bits (256*8)
+  uint8_t m_data[s_size];
+};
+
+uint2048_t::uint2048_t() // Default constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init("0");
+}
+
+uint2048_t::uint2048_t(const uint2048_t& other) // Copy constructor
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Copy(other);
+}
+
+uint2048_t::uint2048_t(uint64_t a_number) // Create from a number
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  std::string numStr = int2hex(a_number);
+  //printf("\n init from int: 0x%x -> %s\n", int(a_number), numStr.c_str());
+  m_pimpl->Init(numStr.c_str());
+  //m_pimpl->Init(int2hex(a_number).c_str());
+}
+
+uint2048_t::uint2048_t(const char* str) // Create from a string
+ : m_pimpl(std::make_unique<Pimpl>())
+{
+  m_pimpl->Init(str);
+}
+
+uint2048_t::~uint2048_t() // Destructor
+{
+}
+
+void uint2048_t::operator=(const uint2048_t& other) // Assignment operator
+{
+  m_pimpl->Copy(other);
+}
+
+bool uint2048_t::operator!() const // Is zero
+{
+  return (Pimpl::Compare(*this, uint2048_t()) == CR_Equal);
+}
+
+bool uint2048_t::operator!=(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) != CR_Equal);
+}
+
+bool uint2048_t::operator==(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_Equal);
+}
+
+bool uint2048_t::operator<(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_LessThan);
+}
+
+bool uint2048_t::operator>(const uint2048_t& other) const
+{
+  return (Pimpl::Compare(*this, other) == CR_GreaterThan);
+}
+
+bool uint2048_t::operator<=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_LessThan);
+}
+
+bool uint2048_t::operator>=(const uint2048_t& other) const
+{
+  ComparisonResult result = Pimpl::Compare(*this, other);
+  return (result == CR_Equal || result == CR_GreaterThan);
+}
+
+bool uint2048_t::operator&&(const uint2048_t& other) const
+{
+  return (!!*this) && (!!other);
+}
+
+bool uint2048_t::operator||(const uint2048_t& other) const
+{
+  return (!!*this) || (!!other);
+}
+
+uint2048_t& uint2048_t::operator--()
+{
+  *this = Pimpl::Subtract(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator++()
+{
+  *this = Pimpl::Add(*this, uint2048_t(1));
+  return *this;
+}
+
+uint2048_t uint2048_t::operator++(int) 
+{
+  uint2048_t tmp(*this);
+  operator++();
+  return tmp;
+}
+
+uint2048_t uint2048_t::operator--(int) 
+{
+  uint2048_t tmp(*this);
+  operator--();
+  return tmp;
+}
+
+uint2048_t& uint2048_t::operator-=(const uint2048_t& other)
+{
+  *this = Pimpl::Subtract(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator+=(const uint2048_t& other)
+{
+  *this = Pimpl::Add(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator*=(const uint2048_t& other) 
+{
+  *this = Pimpl::Multiply(*this, other);
+  return *this;
+}
+
+uint2048_t uint2048_t::operator-(const uint2048_t& other) const
+{
+  return Pimpl::Subtract(*this, other);
+}
+
+uint2048_t uint2048_t::operator+(const uint2048_t& other) const
+{
+  return Pimpl::Add(*this, other);
+}
+
+uint2048_t uint2048_t::operator*(const uint2048_t& other) const
+{
+  return Pimpl::Multiply(*this, other);
+}
+
+uint2048_t uint2048_t::operator/(const uint2048_t& other) const
+{
+  return Pimpl::Divide(*this, other);
+}
+
+uint2048_t uint2048_t::operator%(const uint2048_t& other) const
+{
+  return Pimpl::Modulus(*this, other);
+}
+
+uint2048_t uint2048_t::operator&(const uint2048_t& other) const
+{
+  return Pimpl::And(*this, other);
+}
+
+uint2048_t uint2048_t::operator|(const uint2048_t& other) const
+{
+  return Pimpl::Or(*this, other);
+}
+
+uint2048_t uint2048_t::operator^(const uint2048_t& other) const
+{
+  return Pimpl::Xor(*this, other);
+}
+
+uint2048_t uint2048_t::operator~() const
+{
+  return Pimpl::Not(*this);
+}
+
+uint2048_t uint2048_t::operator>>(int shift) const
+{
+  return Pimpl::ShiftRight(*this, shift);
+}
+
+uint2048_t uint2048_t::operator<<(int shift) const
+{
+  return Pimpl::ShiftLeft(*this, shift);
+}
+
+uint2048_t& uint2048_t::operator>>=(int shift)
+{
+  *this = Pimpl::ShiftRight(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator<<=(int shift)
+{
+  *this = Pimpl::ShiftLeft(*this, shift);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator/=(const uint2048_t& other)
+{
+  *this = Pimpl::Divide(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator%=(const uint2048_t& other)
+{
+  *this = Pimpl::Modulus(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator&=(const uint2048_t& other)
+{
+  *this = Pimpl::And(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator|=(const uint2048_t& other)
+{
+  *this = Pimpl::Or(*this, other);
+  return *this;
+}
+
+uint2048_t& uint2048_t::operator^=(const uint2048_t& other)
+{
+  *this = Pimpl::Xor(*this, other);
+  return *this;
+}
+
+uint2048_t::operator uint64_t() const
+{
+  return (uint64_t)strtoull(std::string(*this).c_str(), 0, 16);
+}
+
+void uint2048_t::Print() const
+{
+  std::cout << std::string(*this);
+}
+
+// base ^ exponent % modulus
+uint2048_t uint2048_t::ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus)
+{
+  uint2048_t dst(1);
+  uint2048_t a = base % modulus;
+  uint2048_t b = exponent % modulus;
+
+  size_t mn = modulus.m_pimpl->GetSize();
+  const uint8_t *modBits = modulus.m_pimpl->m_data;
+  uint8_t *aBits = a.m_pimpl->m_data;
+  size_t an = a.m_pimpl->GetSize();
+
+#if 1 
+  // pre-computed modulus shifted by 0 to 7
+  uint8_t tmpModBits[s_size*16];
+  for (int shift = 0; shift < 8; shift++)
+  {
+    uint32_t overflow = 0;
+    uint8_t* mb = &tmpModBits[s_size*2*shift];
+    memset(mb, 0, s_size*2);
+
+    for (int i = 0; i < s_size; i++)
+    {
+      uint32_t var = modBits[i] << shift;
+      mb[i] = var | overflow;
+      overflow = var >> 8;
+    }
+  }
+#endif
+
+  uint8_t tmp1Bits[s_size*2];
+  uint8_t *dstBits = dst.m_pimpl->m_data;
+
+  //printf("\n a: %s\n", std::string(a).c_str());
+  //printf("\n b: %s\n", std::string(b).c_str());
+  while (!!b) // while (bnbits)
+  {
+    if (b[0] & 1)  // if (!!(b & BigInteger(1)))
+    {
+      //dst = (dst * a) % modulus;
+      memset(tmp1Bits, 0, s_size*2);
+      size_t dn = dst.m_pimpl->GetSize();
+      Pimpl::MulMod(dstBits, aBits, tmpModBits, dn, an, mn, tmp1Bits);
+      memcpy(dstBits, tmp1Bits, s_size);
+    }
+
+    //a = (a * a) % modulus;
+    memset(tmp1Bits, 0, s_size*2);
+    Pimpl::MulMod(aBits, aBits, tmpModBits, an, an, mn, tmp1Bits);
+    memcpy(aBits, tmp1Bits, s_size);
+    an = a.m_pimpl->GetSize();
+    
+    b >>= 1; // perhaps could instead of shifting, test a particular bit instead of lowest bit
+    // bnbits--;
+  }
+
+  return dst;
+}
+
+uint2048_t::operator std::string() const
+{
+  std::ostringstream oss;
+  oss << "0x";
+  size_t n = m_pimpl->GetSize();
+  for (size_t i = 0; i < n; i++)
+    oss << std::setfill('0') << std::setw(2) << std::hex << (int)m_pimpl->m_data[n - i - 1];
+  return oss.str();
+}
+
+size_t uint2048_t::Size() const
+{
+  return (sizeof(m_pimpl->m_data) / sizeof(m_pimpl->m_data[0]));
+}
+
+uint8_t uint2048_t::operator[](size_t index) const
+{
+  return m_pimpl->m_data[index];
+}
+
+
+////////////////// PRIVATE //////////////////////////////////////////////////
+
+static unsigned int HexCharToInt(unsigned char ch)
+{
+  unsigned int c = tolower(ch);
+  return (c >= 'a') ? c - 'a' + 10 : c - '0';
+}
+
+void uint2048_t::Pimpl::Init(const char* str)
+{
+  int len = strlen(str);
+  memset(m_data, 0, s_size);
+  size_t idx = 0;
+  for (int i = 0; i < len; i++, idx++)
+  {
+    int val = HexCharToInt(str[len - 1 - i]);
+    i++;
+    if (i < len)
+      val |= HexCharToInt(str[len - 1 - i]) << 4;
+    m_data[idx] = val;
+  }
+}
+
+void uint2048_t::Pimpl::Copy(const uint2048_t& other)
+{
+  memcpy(m_data, other.m_pimpl->m_data, sizeof(m_data));
+}
+
+void uint2048_t::Pimpl::Normalize()
+{
+}
+
+size_t uint2048_t::Pimpl::GetSize()
+{
+  size_t an;
+  for (an = s_size - 1; an; an--)
+    if (m_data[an])
+      break;
+  an++;
+  return an;
+}
+
+ComparisonResult uint2048_t::Pimpl::Compare(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = s_size - 1;
+  ComparisonResult result = CR_Equal;
+  do {
+    if (a[an] > b[an]) {
+      result = CR_GreaterThan; // a > b
+      break;
+    }
+    if (a[an] < b[an]) {
+      result = CR_LessThan; // b > a
+      break;
+    }
+  } while (an--);
+  /*
+  // This extra loop has no meaning except to try to show how we might reduce
+  // timing based side channel attacks by making the running time more consistent
+  int dummy = 0;
+  do {
+    if (a[an] > b[an]) {
+      dummy++;
+    }
+    if (a[an] < b[an]) {
+      dummy--;
+    }
+  } while (an--);
+  */
+  return result; // the two values are equal
+}
+
+uint2048_t uint2048_t::Pimpl::Or(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] |= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::And(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] &= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Xor(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result = a;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] ^= b[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Not(const uint2048_t& a)
+{
+  uint2048_t result;
+  for (size_t idx = 0; idx < s_size; idx++)
+    result.m_pimpl->m_data[idx] = ~a[idx];
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Add(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint8_t carry = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the carry from previous iteration
+    uint32_t tmp = a[i] + carry + b[i];
+    //if (i != s_size - 1)
+    result.m_pimpl->m_data[i] = tmp;
+    carry = tmp >> 8;
+  }
+  return result;
+}
+
+uint2048_t uint2048_t::Pimpl::Subtract(const uint2048_t& a, const uint2048_t& b)
+{
+  uint2048_t result;
+  uint32_t borrow = 0; // carry / borrow flag
+  for (size_t i = 0; i < s_size; i++)
+  {
+    // apply the borrow from previous iteration
+    uint32_t tmp = a[i] - borrow - b[i];
+    result.m_pimpl->m_data[i] = tmp;
+    borrow = (tmp >> 8) & 1;
+  }
+  return result;
+}
+
+// big number multiply algorithms
+// https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
+uint2048_t uint2048_t::Pimpl::Multiply(const uint2048_t& a, const uint2048_t& b)
+{
+  //if (b > a) // Swap if a is not the larger value. We can now assume a is the larger value after
+  //  return Multiply(b, a);
+
+  // Work out expected output size
+  size_t an = a.m_pimpl->GetSize();
+  size_t bn = b.m_pimpl->GetSize();
+  //if (bn > an)
+  //  return Multiply(b, a);
+
+/*
+a=   1234   a[0] = 4, a[1] = 3 ...
+b=  x5678
+    -----
+      4x8            r[0+0] +=                      (a[0] x b[0]) % 10
+     +4x7x10         r[0+1] += (a[0] x b[0]) / 10 + (a[0] x b[1]) % 10
+     +4x6x100        r[0+2] += (a[0] x b[1]) / 10 + (a[0] x b[2]) % 10
+     +4x5x1000       r[0+3] += (a[0] x b[2]) / 10 + (a[0] x b[3]) % 10
+     +3x8x10         r[1+0] +=                      (a[1] x b[0]) % 10
+     +3x7x100        r[1+1] += (a[1] x b[0]) / 10 + (a[1] x b[1]) % 10
+     +3x6x1000
+     +3x5x10000
+  -----
+*/
+
+  // Setup a destination value
+  uint2048_t d;
+  uint8_t dest[s_size*2];
+  memset(dest, 0, s_size*2);
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    //dest[bn+j] = dest[bn+j] + c1 + c2;
+    dest[bn+j] += c1 + c2;
+  }
+  memcpy(d.m_pimpl->m_data, dest, s_size);
+  return d;
+}
+
+uint2048_t uint2048_t::Pimpl::Divide(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  if (bn > an)
+    return uint2048_t();
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t bit = uint2048_t(1) << bitsLarger;
+  //printf(" bit: %s \n", std::string(bit).c_str());
+  uint2048_t tmp = b << bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  uint2048_t res = uint2048_t();
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      res += bit;
+    }
+    tmp >>= 1;
+    bit >>= 1;
+  } while (bitsLarger--);
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::Modulus(const uint2048_t& a, const uint2048_t& b)
+{
+  size_t an = a.m_pimpl->GetSize() - 1;
+  size_t bn = b.m_pimpl->GetSize() - 1;
+  //printf(" an: %i bn: %i \n", an, bn);
+  //printf("\n");
+  if (bn > an)
+  {
+    //printf("\n bn > an   an: %i bn: %i \n", an, bn);
+    return a;
+  }
+  if (bn == an)
+    if (b > a)
+    {
+      //printf("\n b > a   an: %i bn: %i \n", an, bn);
+      //printf(" a: %s \n", std::string(a).c_str());
+      //printf(" b: %s \n\n", std::string(b).c_str());
+      return a;
+    }
+  size_t bitsLarger = (an - bn + 1) << 3;
+  //printf(" bl: %i \n", bitsLarger);
+  uint2048_t tmp = b;
+  tmp <<= bitsLarger;
+  //printf(" tmp: %s \n", std::string(tmp).c_str());
+  uint2048_t mod = a;
+  do {
+    if (mod >= tmp) {
+      mod -= tmp;
+      //printf(" mod: %s \n", std::string(mod).c_str());
+    }
+    tmp >>= 1;
+    //printf(" tmp: %s \n", std::string(tmp).c_str());
+  } while (bitsLarger--);
+  //printf(" mod: %s \n", std::string(mod).c_str());
+  return mod;
+}
+
+void uint2048_t::Pimpl::MulMod(uint8_t* a, uint8_t* b, uint8_t* mod, size_t an, size_t bn, size_t mn, uint8_t* dest)
+//uint2048_t uint2048_t::Pimpl::MulMod(const uint2048_t& a, const uint2048_t& b, const uint2048_t& mod)
+{
+  // This is where we are doing the multiplication in to dest (which is double width)
+  for (size_t j = 0; j < an; j++)
+  {
+    // basically component wise matrix of a * b
+    uint32_t c1 = 0, c2 = 0;
+    for (size_t i = 0; i < bn; i++)
+    {
+      uint32_t t1 = ((uint32_t)a[j] * (uint32_t)b[i]) + (uint32_t)c1;
+      uint32_t t2 = (uint32_t)dest[i+j] + c2 + (t1 & 0xFF);
+      c1 = t1 >> 8;
+      c2 = t2 >> 8;
+      dest[i+j] = t2;
+    }
+    dest[bn+j] += c1 + c2;
+  }
+
+  // 'an' is now the size of dest
+  an = an + bn;
+
+  uint8_t* dstPtr = dest;
+
+  // check the magnitude of modulus is about or less than dest (otherwise we can just return dest)
+  if (mn <= an)
+  {
+    // This is where we are doing the modulus of  dest % mod
+    size_t byteShift = an - mn + 1;
+    size_t bitsLarger = byteShift << 3;
+
+    // siz is the byte width of the numbers we need to deal with
+    const size_t siz = s_size + byteShift;
+
+    // shift 'mod' in to place directly in to 'tmp'  (eg: tmp = mod << byteShift)
+    //memcpy(&tmp[byteShift], mod, s_size);
+
+    // here we are doing the shift 'virtually' in that we just keep track of the shift amount
+    // and then fix up below as applicable
+    const uint8_t* tmpPtr = mod;
+    size_t byt = byteShift;
+
+    do {
+      size_t idx = siz - 1;
+      // we are checking if dest >= tmp. We exit the loop when they stop being equal as we scan from high
+      // order bytes to low order ones. When they stop being equal (or we get to the end of the array)
+      // the comparison of the last value we were at will be the comparison result for the entire value
+      for (idx = siz - 1; idx >= byt; idx--)
+        if (dest[idx] != tmpPtr[idx-byt]) // here 'tmp' is indexed in a way as if it was shifted by byt bytes
+          break;
+      if (idx < byt)
+        for (idx = byt - 1; idx; idx--)
+          if (dest[idx]) // here we are in the area of the virtual 'tmp' that was shifted out to be filled with virtual 0s so this collapsed to just be checking if dest[idx] != 0
+            break;
+      /*
+      while (idx && dest[idx] == ((idx >= byt) ? tmpPtr[idx-byt] : 0) ) // Hot code path
+      {
+        idx--;
+      }
+      */
+      if (dest[idx] >= ((idx >= byt) ? tmpPtr[idx-byt] : 0) )
+      {
+        // here we subtract 'tmp' from 'dest'  (eg: dest -= tmp)
+        uint32_t borrow = 0; // carry / borrow flag
+        //for (size_t i = 0; i < siz; i++)
+        for (size_t i = byt; i < siz; i++)
+        {
+          // apply the borrow from previous iteration
+          // uint32_t x = dest[i] - borrow - ((i >= byt) ? tmpPtr[i-byt] : 0); // tmp[i];  // Hot
+          /*
+          uint32_t x = dest[i] - borrow - tmpPtr[i-byt]; // Hot code path
+          dest[i] = x;
+          borrow = (x >> 8) & 1;
+          */
+          uint32_t t = tmpPtr[i-byt]; // OOO
+          uint32_t sub = borrow + t;
+          uint32_t x = dest[i]; // OOO
+          x = x - sub;
+          dest[i] = x; // OO
+          x >>= 8;
+          x &= 1;
+          borrow = x;// (x >> 8) & 1;
+        }
+      }
+
+      if (bitsLarger--)
+      {
+        // 'tmp' = 'mod' << bitsLarger
+        // but we just update the virtual tmp values (mod has precomputed shifts from 0-7)
+        tmpPtr = mod + s_size*2*(bitsLarger & 7);
+        byt = bitsLarger >> 3;
+      }
+      bitsLarger++;
+    } while (bitsLarger--);
+  }
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftLeft(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the left shift is really shuffling the bytes right
+  // Within the bytes we still shift left
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  for (int i = byteShift; i < s_size; i++)
+  {
+    uint32_t var = a[i - byteShift] << shift;
+    res.m_pimpl->m_data[i] = var | overflow;
+    overflow = var >> 8;
+  }
+  return res;
+}
+
+uint2048_t uint2048_t::Pimpl::ShiftRight(const uint2048_t& a, int shift)
+{
+  // Because the data is stored in reverse endian order, the right shift is really shuffling the bytes left
+  // Within bytes we still shift right
+  // Shift right is easier because we don't need to change any buffer sizes
+  // TODO: check we are zeroing out properly, or normalizing which might be easier
+  uint2048_t res;
+  size_t byteShift = shift >> 3;
+  uint8_t overflow = 0;
+  shift &= 7;
+  //for (size_t i = s_size - 1; i; i--)
+  for (size_t i = byteShift; i < s_size - 1; i++)
+  {
+    uint8_t var = a[i] >> shift;
+    overflow    = a[i+1] << (8 - shift);
+    res.m_pimpl->m_data[i-byteShift] = var | overflow;
+  }
+  res.m_pimpl->m_data[s_size-1-byteShift] = a[s_size-1] >> shift;
+  return res;
+}
+
diff --git a/Crypto/Integer/Integer.h b/Crypto/Integer/Integer.h
new file mode 100644
index 0000000..e2a39ec
--- /dev/null
+++ b/Crypto/Integer/Integer.h
@@ -0,0 +1,91 @@
+/*
+  uint2048_t Class
+  Written by John Ryland
+  (C) Copyright 2015
+*/
+#ifndef INTEGER_H
+#define INTEGER_H
+
+#include <stdint.h>
+#include <vector>
+#include <string>
+#include <memory>
+
+/*
+  uint2048_t is a class that behaves similar to a regular built in type
+  however it can represent arbitarily large values, eg 1024 bit numbers
+  which is commonly the case in cryptographic code or other special uses
+  of maths. As its name suggests, it only holds integers, and positive
+  ones at that. Treat it like you would an unsigned int that can't
+  overflow, and in the case where it could go negative, it truncates to
+  zero (this happens when subtracting a larger number from a smaller one).
+*/
+class uint2048_t
+{
+public:
+  uint2048_t();                            // Default constructor (the value will be zero)
+  uint2048_t(const uint2048_t& other);     // Copy constructor
+  uint2048_t(uint64_t a_number);           // Create from a number
+  uint2048_t(const char* str);             // Create from a string
+  ~uint2048_t();                           // Destructor
+  void operator=(const uint2048_t& other); // Assignment operator
+
+  // Operator overloads
+  bool operator!() const; // Is zero
+  bool operator<(const uint2048_t& other) const;
+  bool operator>(const uint2048_t& other) const;
+  bool operator<=(const uint2048_t& other) const;
+  bool operator>=(const uint2048_t& other) const;
+  bool operator==(const uint2048_t& other) const;
+  bool operator!=(const uint2048_t& other) const;
+  bool operator&&(const uint2048_t& other) const;
+  bool operator||(const uint2048_t& other) const;
+  uint2048_t operator>>(int shift) const;
+  uint2048_t operator<<(int shift) const;
+  uint2048_t operator--(int);
+  uint2048_t operator++(int);
+  uint2048_t operator-(const uint2048_t& other) const;
+  uint2048_t operator+(const uint2048_t& other) const;
+  uint2048_t operator*(const uint2048_t& other) const;
+  uint2048_t operator/(const uint2048_t& other) const;
+  uint2048_t operator%(const uint2048_t& other) const;
+  uint2048_t operator&(const uint2048_t& other) const;
+  uint2048_t operator|(const uint2048_t& other) const;
+  uint2048_t operator^(const uint2048_t& other) const;
+  uint2048_t operator~() const;
+  uint2048_t& operator>>=(int shift);
+  uint2048_t& operator<<=(int shift);
+  uint2048_t& operator--();
+  uint2048_t& operator++();
+  uint2048_t& operator-=(const uint2048_t& other);
+  uint2048_t& operator+=(const uint2048_t& other);
+  uint2048_t& operator*=(const uint2048_t& other);
+  uint2048_t& operator/=(const uint2048_t& other);
+  uint2048_t& operator%=(const uint2048_t& other);
+  uint2048_t& operator&=(const uint2048_t& other);
+  uint2048_t& operator|=(const uint2048_t& other);
+  uint2048_t& operator^=(const uint2048_t& other);
+
+  explicit operator std::string() const;
+  explicit operator uint64_t() const;
+  uint8_t operator[](size_t index) const;
+  void Print() const;
+  size_t Size() const;
+
+  // Returns the quotent, and modulus in modulusResult of numerator / denominator
+  static uint2048_t DivMod(const uint2048_t& numerator, const uint2048_t& denominator, uint2048_t& modulusResult);
+
+  // Returns the result of:  base ^ exponent % modulus
+  static uint2048_t ExpMod(const uint2048_t& base, const uint2048_t& exponent, const uint2048_t& modulus);
+
+private:
+  // Implementation hidden behind pimpl
+  // helps with compile times
+  // helps maintain binary compatibility
+  // makes it possible to easily swap out implementations
+  struct Pimpl;
+  std::unique_ptr<Pimpl> m_pimpl;
+};
+
+#endif // INTEGER_H
+
diff --git a/Crypto/Integer/Makefile b/Crypto/Integer/Makefile
new file mode 100644
index 0000000..1031bd2
--- /dev/null
+++ b/Crypto/Integer/Makefile
@@ -0,0 +1,7 @@
+
+TARGET       = tests
+CONFIG       = release
+SOURCES      = main.cpp Integer.cpp
+
+include ../../Makefile.inc
+
diff --git a/Crypto/Integer/main.cpp b/Crypto/Integer/main.cpp
new file mode 100644
index 0000000..ff34d43
--- /dev/null
+++ b/Crypto/Integer/main.cpp
@@ -0,0 +1,78 @@
+#include <stdio.h>
+#include <stdint.h>
+#include "Integer.h"
+
+
+const char* secretMessage =
+	"b3bb321dd5ba49a6dc3003d47d769e0b8001154fb7e785c567e68986"
+	"cf6caadd1f57c8f0b5bd33553ab25b519f8f59637d2913e21ba6c2c4"
+	"f8cc1e4b933507191352acbbbca4774b82471bd052dfd9f95f17e644"
+	"cd0efdfbe692ea54d78fcdb79e6aa6ae9d6c100d7dd1f72bb68ae8e7"
+	"2fe3886e90aa89268c5c8eb961df316b568acd8c1e78cb20ec1ebd3e"
+	"ef01828e271093a9c83380b62187f2a92f0d191ba463307de661c5ad"
+	"0ac229cc347d8dd932a639dc1d724a707d2f5528ca75005a126877e9"
+	"b58e7e139854dae90e43dcdfb3d2373f412aafa2e1a977addc9be9a8"
+	"c1e959af236dcd42e76f1fefd577c6211f7b033da9c2b683f131a7c7"
+	"0d96038b";
+
+
+const char* publicModulus =
+	"C595751A3ECF23F251F42D53053E2A88B195F79FAA370EBDA69D4C0E"
+	"8535E9DCBD1A28CD726CB097504B041807FB0755228B180F4EC82907"
+	"2BF716A4DD0948917B09308509916A7AB1F4D5C2D65444284FB734AB"
+	"7F161AE2FACC6962EC0CF12559BD81E34E945B8A5BDE6276EAD96F7B"
+	"1ECBE0BFD1D8266261AF6C6CF6D72F0962002FC774A0FB43C5A7EA3E"
+	"FFBFC5EF95703CB408BA414E0C280611F03A8423C3A5A827D49B8CB8"
+	"5E92513C7A68870701B16D0511EE1A8EA1998A519DAA3BFFCB926236"
+	"D94658C57E413B49E8F7881B7000DF05655487E9B0D9D82F077EEC1F"
+	"8533816D074AE6B347430C5F3F98E5350A0A3121B243ED1C1CFA9408"
+	"BA64E00D";
+
+
+const char* publicExponent = "010001";
+
+
+template <typename T>
+void rsa_test(bool debug)
+{
+	printf("\nRSA test:\n");
+	T data(secretMessage);
+	T exponent(publicExponent);
+	T modulus(publicModulus);
+	T result = T::ExpMod(data, exponent, modulus);
+    size_t siz = result.Size();
+    if (debug)
+    {
+	  printf("\ndata:     "); data.Print();
+	  printf("\nexponent: "); exponent.Print();
+	  printf("\nmodulus:  "); modulus.Print();
+	  printf("\nresult:   "); result.Print();
+	  printf("\nsize:     %i", int(siz));
+    }
+	printf("\nmessage: \n");
+    for (size_t i = 0; i < siz; i++)
+    {
+	  unsigned int val = result[siz-1-i];
+	  if (isprint(val) || isspace(val))
+        printf("%c", val);
+    }
+	printf("\n");
+}
+
+
+int main(int argc, char* argv[])
+{
+  if (argc != 2)
+  {
+    printf("Usage:  %s  loops\n", argv[0]);
+    return -1;
+  }
+  int loops = atoi(argv[1]);
+  for (int i = 0; i < loops; i++)
+  {
+    rsa_test<uint2048_t>(false);
+  }
+  return 0;
+}
+
+