///////////////////////////////////////////////////////////////////////////////////////////////
//
//  SWIFFTX ANSI C OPTIMIZED 32BIT IMPLEMENTATION FOR NIST SHA-3 COMPETITION
//
//  SWIFFTX.c
//
//  October 2008
//
//  This is the source file of the OPTIMIZED 32BIT implementation of SWIFFTX hash function.
//  SWIFFTX is a candidate function for SHA-3 NIST competition.
//  More details about SWIFFTX can be found in the accompanying submission documents.
//
///////////////////////////////////////////////////////////////////////////////////////////////
#include "swifftx.h"
// See the remarks concerning compatibility issues inside stdint.h.
#include "stdint.h"
// Remove this while using gcc:
//#include "stdbool.h"
#include <memory.h>

#include "simd-utils.h"

///////////////////////////////////////////////////////////////////////////////////////////////
// Constants and static tables portion.
///////////////////////////////////////////////////////////////////////////////////////////////

// In SWIFFTX we work over Z_257, so this is the modulus and the arithmetic is performed modulo
// this number.
#define FIELD_SIZE 257

// The size of FFT we use:
#define N 64

#define LOGN 6

#define EIGHTH_N (N / 8)

// The number of FFTS done on the input.
#define M (SWIFFTX_INPUT_BLOCK_SIZE / 8)   // 32

// Omega is the 128th root of unity in Z_257.
// We choose w = 42.
#define OMEGA 42

// The size of the inner FFT lookup table:
#define W 8

// Calculates the sum and the difference of two numbers.
//
// Parameters:
// - A: the first operand. After the operation stores the sum of the two operands.
// - B: the second operand. After the operation stores the difference between the first and the
//   second operands.
//#define ADD_SUB(A, B) {register int temp = (B); B = ((A) - (B)); A = ((A) + (temp));}

// Quickly reduces an integer modulo 257.
//
// Parameters:
// - A: the input.
//#define Q_REDUCE(A) (((A) & 0xff) - ((A) >> 8))

// Since we need to do the setup only once, this is the indicator variable:
static bool wasSetupDone = false;

// This array stores the powers of omegas that correspond to the indices, which are the input
// values. Known also as the "outer FFT twiddle factors".
swift_int16_t multipliers[N] __attribute__ ((aligned (64)));

// This array stores the powers of omegas, multiplied by the corresponding values.
// We store this table to save computation time.
//
// To calculate the intermediate value of the compression function (the first out of two
// stages), we multiply the k-th bit of x_i by w^[(2i + 1) * k]. {x_i} is the input to the
// compression function, i is between 0 and 31, x_i is a 64-bit value.
// One can see the formula for this (intermediate) stage in the SWIFFT FSE 2008 paper --
// formula (2), section 3, page 6.
swift_int16_t fftTable[256 * EIGHTH_N] __attribute__ ((aligned (64)));

// The A's we use in SWIFFTX shall be random elements of Z_257.
// We generated these A's from the decimal expansion of PI as follows:  we converted each
// triple of digits into a decimal number d. If d < (257 * 3) we used (d % 257) for the next A
// element, otherwise move to the next triple of digits in the expansion. This guarntees that
// the A's are random, provided that PI digits are.
const swift_int16_t As[3 * M * N] __attribute__ ((aligned (64))) =
{141,  78, 139,  75, 238, 205, 129, 126,  22, 245, 197, 169, 142, 118, 105,  78,
  50, 149,  29, 208, 114,  34,  85, 117,  67, 148,  86, 256,  25,  49, 133,  93,
  95,  36,  68, 231, 211, 102, 151, 128, 224, 117, 193,  27, 102, 187,   7, 105,
  45, 130, 108, 124, 171, 151, 189, 128, 218, 134, 233, 165,  14, 201, 145, 134,
  52, 203,  91,  96, 197,  69, 134, 213, 136,  93,   3, 249, 141,  16, 210,  73,
   6,  92,  58,  74, 174,   6, 254,  91, 201, 107, 110,  76, 103,  11,  73,  16,
  34, 209,   7, 127, 146, 254,  95, 176,  57,  13, 108, 245,  77,  92, 186, 117,
 124,  97, 105, 118,  34,  74, 205, 122, 235,  53,  94, 238, 210, 227, 183,  11,
 129, 159, 105, 183, 142, 129,  86,  21, 137, 138, 224, 223, 190, 188, 179, 188,
 256,  25, 217, 176,  36, 176, 238, 127, 160, 210, 155, 148, 132,   0,  54, 127,
 145,   6,  46,  85, 243,  95, 173, 123, 178, 207, 211, 183, 224, 173, 146,  35,
  71, 114,  50,  22, 175,   1,  28,  19, 112, 129,  21,  34, 161, 159, 115,  52,
   4, 193, 211,  92, 115,  49,  59, 217, 218,  96,  61,  81,  24, 202, 198,  89,
  45, 128,   8,  51, 253,  87, 171,  35,   4, 188, 171,  10,   3, 137, 238,  73,
  19, 208, 124, 163, 103, 177, 155, 147,  46,  84, 253, 233, 171, 241, 211, 217,
 159,  48,  96,  79, 237,  18, 171, 226,  99,   1,  97, 195, 216, 163, 198,  95,
   0, 201,  65, 228,  21, 153, 124, 230,  44,  35,  44, 108,  85, 156, 249, 207,
  26, 222, 131,   1,  60, 242, 197, 150, 181,  19, 116, 213,  75,  98, 124, 240,
 123, 207,  62, 255,  60, 143, 187, 157, 139,   9,  12, 104,  89,  49, 193, 146,
 104, 196, 181,  82, 198, 253, 192, 191, 255, 122, 212, 104,  47,  20, 132, 208,
  46, 170,   2,  69, 234,  36,  56, 163,  28, 152, 104, 238, 162,  56,  24,  58,
  38, 150, 193, 254, 253, 125, 173,  35,  73, 126, 247, 239, 216,   6, 199,  15,
  90,  12,  97, 122,   9,  84, 207, 127, 219,  72,  58,  30,  29, 182,  41, 192,
 235, 248, 237,  74,  72, 176, 210, 252,  45,  64, 165,  87, 202, 241, 236, 223,
 151, 242, 119, 239,  52, 112, 169,  28,  13,  37, 160,  60, 158,  81, 133,  60,
  16, 145, 249, 192, 173, 217, 214,  93, 141, 184,  54,  34, 161, 104, 157,  95,
  38, 133, 218, 227, 211, 181,   9,  66, 137, 143,  77,  33, 248, 159,   4,  55,
 228,  48,  99, 219, 222, 184,  15,  36, 254, 256, 157, 237,  87, 139, 209, 113,
 232,  85, 126, 167, 197, 100, 103, 166,  64, 225, 125, 205, 117, 135,  84, 128,
 231, 112,  90, 241,  28,  22, 210, 147, 186,  49, 230,  21, 108,  39, 194,  47,
 123, 199, 107, 114,  30, 210, 250, 143,  59, 156, 131, 133, 221,  27,  76,  99,
 208, 250,  78,  12, 211, 141,  95,  81, 195, 106,   8, 232, 150, 212, 205, 221,
  11, 225,  87, 219, 126, 136, 137, 180, 198,  48,  68, 203, 239, 252, 194, 235,
 142, 137, 174, 172, 190, 145, 250, 221, 182, 204,   1, 195, 130, 153,  83, 241,
 161, 239, 211, 138,  11, 169, 155, 245, 174,  49,  10, 166,  16, 130, 181, 139,
 222, 222, 112,  99, 124,  94,  51, 243, 133, 194, 244, 136,  35, 248, 201, 177,
 178, 186, 129, 102,  89, 184, 180,  41, 149,  96, 165,  72, 225, 231, 134, 158,
 199,  28, 249,  16, 225, 195,  10, 210, 164, 252, 138,   8,  35, 152, 213, 199,
  82, 116,  97, 230,  63, 199, 241,  35,  79, 120,  54, 174,  67, 112,   1,  76,
  69, 222, 194,  96,  82,  94,  25, 228, 196, 145, 155, 136, 228, 234,  46, 101,
 246,  51, 103, 166, 246,  75,   9, 200, 161,   4, 108,  35, 129, 168, 208, 144,
  50,  14,  13, 220,  41, 132, 122, 127, 194,   9, 232, 234, 107,  28, 187,   8,
  51, 141,  97, 221, 225,   9, 113, 170, 166, 102, 135,  22, 231, 185, 227, 187,
 110, 145, 251, 146,  76,  22, 146, 228,   7,  53,  64,  25,  62, 198, 130, 190,
 221, 232, 169,  64, 188, 199, 237, 249, 173, 218, 196, 191,  48, 224,   5, 113,
 100, 166, 160,  21, 191, 197,  61, 162, 149, 171, 240, 183, 129, 231, 123, 204,
 192, 179, 134,  15,  47, 161, 142, 177, 239, 234, 186, 237, 231,  53, 208,  95,
 146,  36, 225, 231,  89, 142,  93, 248, 137, 124,  83,  39,  69,  77,  89, 208,
 182,  48,  85, 147, 244, 164, 246,  68,  38, 190, 220,  35, 202,  91, 157, 151,
 201, 240, 185, 218,   4, 152,   2, 132, 177,  88, 190, 196, 229,  74, 220, 135,
 137, 196,  11,  47,   5, 251, 106, 144, 163,  60, 222, 127,  52,  57, 202, 102,
  64, 140, 110, 206,  23, 182,  39, 245,   1, 163, 157, 186, 163,  80,   7, 230,
  44, 249, 176, 102, 164, 125, 147, 120,  18, 191, 186, 125,  64,  65, 198, 157,
 164, 213,  95,  61,  13, 181, 208,  91, 242, 197, 158,  34,  98, 169,  91,  14,
  17,  93, 157,  17,  65,  30, 183,   6, 139,  58, 255, 108, 100, 136, 209, 144,
 164,   6, 237,  33, 210, 110,  57, 126, 197, 136, 125, 244, 165, 151, 168,   3,
 143, 251, 247, 155, 136, 130,  88,  14,  74, 121, 250, 133,  21, 226, 185, 232,
 118, 132,  89,  64, 204, 161,   2,  70, 224, 159,  35, 204, 123, 180,  13,  52,
 231,  57,  25,  78,  66,  69,  97,  42, 198,  84, 176,  59,   8, 232, 125, 134,
 193,   2, 232, 109, 216,  69,  90, 142,  32,  38, 249,  37,  75, 180, 184, 188,
  19,  47, 120,  87, 146,  70, 232, 120, 191,  45,  33,  38,  19, 248, 110, 110,
  44,  64,   2,  84, 244, 228, 252, 228, 170, 123,  38, 144, 213, 144, 171, 212,
 243,  87, 189,  46, 128, 110,  84,  77,  65, 183,  61, 184, 101,  44, 168,  68,
  14, 106, 105,   8, 227, 211, 166,  39, 152,  43,  52, 254, 197,  55, 119,  89,
 168,  65,  53, 138, 177,  56, 219,   0,  58, 121, 148,  18,  44, 100, 215, 103,
 145, 229, 117, 196,  91,  89, 113, 143, 172, 239, 249, 184, 154,  39, 112,  65,
 204,  42,  84,  38, 155, 151, 151,  16, 100,  87, 174, 162, 145, 147, 149, 186,
 237, 145, 134, 144, 198, 235, 213, 163,  48, 230,  24,  47,  57,  71, 127,   0,
 150, 219,  12,  81, 197, 150, 131,  13, 169,  63, 175, 184,  48, 235,  65, 243,
 149, 200, 163, 254, 202, 114, 247,  67, 143, 250, 126, 228,  80, 130, 216, 214,
  36,   2, 230,  33, 119, 125,   3, 142, 237, 100,   3, 152, 197, 174, 244, 129,
 232,  30, 206, 199,  39, 210, 220,  43, 237, 221, 201,  54, 179,  42,  28, 133,
 246, 203, 198, 177,   0,  28, 194,  85, 223, 109, 155, 147, 221,  60, 133, 108,
 157, 254,  26,  75, 157, 185,  49, 142,  31, 137,  71,  43,  63,  64, 237, 148,
 237, 172, 159, 160, 155, 254, 234, 224, 140, 193, 114, 140,  62, 109, 136,  39,
 255,   8, 158, 146, 128,  49, 222,  96,  57, 209, 180, 249, 202, 127, 113, 231,
  78, 178,  46,  33, 228, 215, 104,  31, 207, 186,  82,  41,  42,  39, 103, 119,
 123, 133, 243, 254, 238, 156,  90, 186,  37, 212,  33, 107, 252,  51, 177,  36,
 237,  76, 159, 245,  93, 214,  97,  56, 190,  38, 160,  94, 105, 222, 220, 158,
  49,  16, 191,  52, 120,  87, 179,   2,  27, 144, 223, 230, 184,   6, 129, 227,
  69,  47, 215, 181, 162, 139,  72, 200,  45, 163, 159,  62,   2, 221, 124,  40,
 159, 242,  35, 208, 179, 166,  98,  67, 178,  68, 143, 225, 178, 146, 187, 159,
  57,  66, 176, 192, 236, 250, 168, 224, 122,  43, 159, 120, 133, 165, 122,  64,
  87,  74, 161, 241,   9,  87,  90,  24, 255, 113, 203, 220,  57, 139, 197, 159,
  31, 151,  27, 140,  77, 162,   7,  27,  84, 228, 187, 220,  53, 126, 162, 242,
  84, 181, 223, 103,  86, 177, 207,  31, 140,  18, 207, 256, 201, 166,  96,  23,
 233, 103, 197,  84, 161,  75,  59, 149, 138, 154, 119,  92,  16,  53, 116,  97,
 220, 114,  35,  45,  77, 209,  40, 196,  71,  22,  81, 178, 110,  14,   3, 180,
 110, 129, 112,  47,  18,  61, 134,  78,  73,  79, 254, 232, 125, 180, 205,  54,
 220, 119,  63,  89, 181,  52,  77, 109, 151,  77,  80, 207, 144,  25,  20,   6,
 208,  47, 201, 206, 192,  14,  73, 176, 256, 201, 207,  87, 216,  60,  56,  73,
  92, 243, 179, 113,  49,  59,  55, 168, 121, 137,  69, 154,  95,  57, 187,  47,
 129,   4,  15,  92,   6, 116,  69, 196,  48, 134,  84,  81, 111,  56,  38, 176,
 239,   6, 128,  72, 242, 134,  36, 221,  59,  48, 242,  68, 130, 110, 171,  89,
  13, 220,  48,  29,   5,  75, 104, 233,  91, 129, 105, 162,  44, 113, 163, 163,
  85, 147, 190, 111, 197,  80, 213, 153,  81,  68, 203,  33, 161, 165,  10,  61,
 120, 252,   0, 205,  28,  42, 193,  64,  39,  37,  83, 175,   5, 218, 215, 174,
 128, 121, 231,  11, 150, 145, 135, 197, 136,  91, 193,   5, 107,  88,  82,   6,
   4, 188, 256,  70,  40,   2, 167,  57, 169, 203, 115, 254, 215, 172,  84,  80,
 188, 167,  34, 137,  43, 243,   2,  79, 178,  38, 188, 135, 233, 194, 208,  13,
  11, 151, 231, 196,  12, 122, 162,  56,  17, 114, 191, 207,  90, 132,  64, 238,
 187,   6, 198, 176, 240,  88, 118, 236,  15, 226, 166,  22, 193, 229,  82, 246,
 213,  64,  37,  63,  31, 243, 252,  37, 156,  38, 175, 204, 138, 141, 211,  82,
 106, 217,  97, 139, 153,  56, 129, 218, 158,   9,  83,  26,  87, 112,  71,  21,
 250,   5,  65, 141,  68, 116, 231, 113,  10, 218,  99, 205, 201,  92, 157,   4,
  97,  46,  49, 220,  72, 139, 103, 171, 149, 129, 193,  19,  69, 245,  43,  31,
  58,  68,  36, 195, 159,  22,  54,  34, 233, 141, 205, 100, 226,  96,  22, 192,
  41, 231,  24,  79, 234, 138,  30, 120, 117, 216, 172, 197, 172, 107,  86,  29,
 181, 151,   0,   6, 146, 186,  68,  55,  54,  58, 213, 182,  60, 231,  33, 232,
  77, 210, 216, 154,  80,  51, 141, 122,  68, 148, 219, 122, 254,  48,  64, 175,
  41, 115,  62, 243, 141,  81, 119, 121,   5,  68, 121,  88, 239,  29, 230,  90,
 135, 159,  35, 223, 168, 112,  49,  37, 146,  60, 126, 134,  42, 145, 115,  90,
  73, 133, 211,  86, 120, 141, 122, 241, 127,  56, 130,  36, 174,  75,  83, 246,
 112,  45, 136, 194, 201, 115,   1, 156, 114, 167, 208,  12, 176, 147,  32, 170,
 251, 100, 102, 220, 122, 210,   6,  49,  75, 201,  38, 105, 132, 135, 126, 102,
  13, 121,  76, 228, 202,  20,  61, 213, 246,  13, 207,  42, 148, 168,  37, 253,
  34,  94, 141, 185,  18, 234, 157, 109, 104,  64, 250, 125,  49, 236,  86,  48,
 196,  77,  75, 237, 156, 103, 225,  19, 110, 229,  22,  68, 177,  93, 221, 181,
 152, 153,  61, 108, 101,  74, 247, 195, 127, 216,  30, 166, 168,  61,  83, 229,
 120, 156,  96, 120, 201, 124,  43,  27, 253, 250, 120, 143,  89, 235, 189, 243,
 150,   7, 127, 119, 149, 244,  84, 185, 134,  34, 128, 193, 236, 234, 132, 117,
 137,  32, 145, 184,  44, 121,  51,  76,  11, 228, 142, 251,  39,  77, 228, 251,
  41,  58, 246, 107, 125, 187,   9, 240,  35,   8,  11, 162, 242, 220, 158, 163,
   2, 184, 163, 227, 242,   2, 100, 101,   2,  78, 129,  34,  89,  28,  26, 157,
  79,  31, 107, 250, 194, 156, 186,  69, 212,  66,  41, 180, 139,  42, 211, 253,
 256, 239,  29, 129, 104, 248, 182,  68,   1, 189,  48, 226,  36, 229,   3, 158,
  41,  53, 241,  22, 115, 174,  16, 163, 224,  19, 112, 219, 177, 233,  42,  27,
 250, 134,  18,  28, 145, 122,  68,  34, 134,  31, 147,  17,  39, 188, 150,  76,
  45,  42, 167, 249,  12,  16,  23, 182,  13,  79, 121,   3,  70, 197, 239,  44,
  86, 177, 255,  81,  64, 171, 138, 131,  73, 110,  44, 201, 254, 198, 146,  91,
  48,   9, 104,  31,  29, 161, 101,  31, 138, 180, 231, 233,  79, 137,  61, 236,
 140,  15, 249, 218, 234, 119,  99, 195, 110, 137, 237, 207,   8,  31,  45,  24,
  90, 155, 203, 253, 192, 203,  65, 176, 210, 171, 142, 214, 220, 122, 136, 237,
 189, 186, 147,  40,  80, 254, 173,  33, 191,  46, 192,  26, 108, 255, 228, 205,
  61,  76,  39, 107, 225, 126, 228, 182, 140, 251, 143, 134, 252, 168, 221,   8,
 185,  85,  60, 233, 147, 244,  87, 137,   8, 140,  96,  80,  53,  45, 175, 160,
 124, 189, 112,  37, 144,  19,  70,  17, 170, 242,   2,   3,  28,  95, 120, 199,
 212,  43,   9, 117,  86, 151, 101, 241, 200, 145, 241,  19, 178,  69, 204, 197,
 227, 166,  94,   7, 193,  45, 247, 234,  19, 187, 212, 212, 236, 125,  33,  95,
 198, 121, 122, 103,  77, 155, 235,  49,  25, 237, 249,  11, 162,   7, 238,  24,
  16, 150, 129,  25, 152,  17,  42,  67, 247, 162,  77, 154,  31, 133,  55, 137,
  79, 119, 153,  10,  86,  28, 244, 186,  41, 169, 106,  44,  10,  49, 110, 179,
  32, 133, 155, 244,  61,  70, 131, 168, 170,  39, 231, 252,  32,  69,  92, 238,
 239,  35, 132, 136, 236, 167,  90,  32, 123,  88,  69,  22,  20,  89, 145, 166,
  30, 118,  75,   4,  49,  31, 225,  54,  11,  50,  56, 191, 246,   1, 187,  33,
 119, 107, 139,  68,  19, 240, 131,  55,  94, 113,  31, 252,  12, 179, 121,   2,
 120, 252,   0,  76,  41,  80, 185,  42,  62, 121, 105, 159, 121, 109, 111,  98,
   7, 118,  86,  29, 210,  70, 231, 179, 223, 229, 164,  70,  62,  47,   0, 206,
 204, 178, 168, 120, 224, 166,  99,  25, 103,  63, 246, 224, 117, 204,  75, 124,
 140, 133, 110, 110, 222,  88, 151, 118,  46,  37,  22, 143, 158,  40,   2,  50,
 153,  94, 190, 199,  13, 198, 127, 211, 180,  90, 183,  98,   0, 142, 210, 154,
 100, 187,  67, 231, 202, 100, 198, 235, 252, 160, 247, 124, 247,  14, 121, 221,
  57,  88, 253, 243, 185,  89,  45, 249, 221, 194, 108, 175, 193, 119,  50, 141,
 223, 133, 136,  64, 176, 250, 129, 100, 124,  94, 181, 159,  99, 185, 177, 240,
 135,  42, 103,  52, 202, 208, 143, 186, 193, 103, 154, 237, 102,  88, 225, 161,
  50, 188, 191, 109,  12,  87,  19, 227, 247, 183,  13,  52, 205, 170, 205, 146,
  89, 160,  18, 105, 192,  73, 231, 225, 184, 157, 252, 220,  61,  59, 169, 183,
 221,  20, 141,  20, 158, 101, 245,   7, 245, 225, 118, 137,  84,  55,  19,  27,
 164, 110,  35,  25, 202,  94, 150,  46,  91, 152, 130,   1,   7,  46,  16, 237,
 171, 109,  19, 200,  65,  38,  10, 213,  70,  96, 126, 226, 185, 225, 181,  46,
  10, 165,  11, 123,  53, 158,  22, 147,  64,  22, 227,  69, 182, 237, 197,  37,
  39,  49, 186, 223, 139, 128,  55,  36, 166, 178, 220,  20,  98, 172, 166, 253,
  45,   0, 120, 180, 189, 185, 158, 159, 196,   6, 214,  79, 141,  52, 156, 107,
   5, 109, 142, 159,  33,  64, 190, 133,  95, 132,  95, 202, 160,  63, 186,  23,
 231, 107, 163,  33, 234,  15, 244,  77, 108,  49,  51,   7, 164,  87, 142,  99,
 240, 202,  47, 256, 118, 190, 196, 178, 217,  42,  39, 153,  21, 192, 232, 202,
  14,  82, 179,  64, 233,   4, 219,  10, 133,  78,  43, 144, 146, 216, 202,  81,
  71, 252,   8, 201,  68, 256,  85, 233, 164,  88, 176,  30,   5, 152, 126, 179,
 249,  84, 140, 190, 159,  54, 118,  98,   2, 159,  27, 133,  74, 121, 239, 196,
  71, 149, 119, 135, 102,  20,  87, 112,  44,  75, 221,   3, 151, 158,   5,  98,
 152,  25,  97, 106,  63, 171, 240,  79, 234, 240, 230,  92,  76,  70, 173, 196,
  36, 225, 218, 133,  64, 240, 150,  41, 146,  66, 133,  51, 134,  73, 170, 238,
 140,  90,  45,  89,  46, 147,  96, 169, 174, 174, 244, 151,  90,  40,  32,  74,
  38, 154, 246,  57,  31,  14, 189, 151,  83, 243, 197, 183, 220, 185,  53, 225,
  51, 106, 188, 208, 222, 248,  93,  13,  93, 215, 131,  25, 142, 185, 113, 222,
 131, 215, 149,  50, 159,  85,  32,   5, 205, 192,   2, 227,  42, 214, 197,  42,
 126, 182,  68, 123, 109,  36, 237, 179, 170, 199,  77, 256,   5, 128, 214, 243,
 137, 177, 170, 253, 179, 180, 153, 236, 100, 196, 216, 231, 198,  37, 192,  80,
 121, 221, 246,   1,  16, 246,  29,  78,  64, 148, 124,  38,  96, 125,  28,  20,
  48,  51,  73, 187, 139, 208,  98, 253, 221, 188,  84, 129,   1, 205,  95, 205,
 117,  79,  71, 126, 134, 237,  19, 184, 137, 125, 129, 178, 223,  54, 188, 112,
  30,   7, 225, 228, 205, 184, 233,  87, 117,  22,  58,  10,   8,  42,   2, 114,
 254,  19,  17,  13, 150,  92, 233, 179,  63,  12,  60, 171, 127,  35,  50,   5,
 195, 113, 241,  25, 249, 184, 166,  44, 221,  35, 151, 116,   8,  54, 195,  89,
 218, 186, 132,   5,  41,  89, 226, 177,  11,  41,  87, 172,   5,  23,  20,  59,
 228,  94,  76,  33, 137,  43, 151, 221,  61, 232,   4, 120,  93, 217,  80, 228,
 228,   6,  58,  25,  62,  84,  91,  48, 209,  20, 247, 243,  55, 106,  80,  79,
 235,  34,  20, 180, 146,   2, 236,  13, 236, 206, 243, 222, 204,  83, 148, 213,
 214, 117, 237,  98,   0,  90, 204, 168,  32,  41, 126,  67, 191,  74,  27, 255,
  26,  75, 240, 113, 185, 105, 167, 154, 112,  67, 151,  63, 161, 134, 239, 176,
  42,  87, 249, 130,  45, 242,  17, 100, 107, 120, 212, 218, 237,  76, 231, 162,
 175, 172, 118, 155,  92,  36, 124,  17, 121,  71,  13,   9,  82, 126, 147, 142,
 218, 148, 138,  80, 163, 106, 164, 123, 140, 129,  35,  42, 186, 154, 228, 214,
  75,  73,   8, 253,  42, 153, 232, 164,  95,  24, 110,  90, 231, 197,  90, 196,
  57, 164, 252, 181,  31,   7,  97, 256,  35,  77, 200, 212,  99, 179,  92, 227,
  17, 180,  49, 176,   9, 188,  13, 182,  93,  44, 128, 219, 134,  92, 151,   6,
  23, 126, 200, 109,  66,  30, 140, 180, 146, 134,  67, 200,   7,   9, 223, 168,
 186, 221,   3, 154, 150, 165,  43,  53, 138,  27,  86, 213, 235, 160,  70,   2,
 240,  20,  89, 212,  84, 141, 168, 246, 183, 227,  30, 167, 138, 185, 253,  83,
  52, 143, 236,  94,  59,  65,  89, 218, 194, 157, 164, 156, 111,  95, 202, 168,
 245, 256, 151,  28, 222, 194,  72, 130, 217, 134, 253,  77, 246, 100,  76,  32,
 254, 174, 182, 193,  14, 237,  74,   1,  74,  26, 135, 216, 152, 208, 112,  38,
 181,  62,  25,  71,  61, 234, 254,  97, 191,  23,  92, 256, 190, 205,   6,  16,
 134, 147, 210, 219, 148,  59,  73, 185,  24, 247, 174, 143, 116, 220, 128, 144,
 111, 126, 101,  98, 130, 136, 101, 102,  69, 127,  24, 168, 146, 226, 226, 207,
 176, 122, 149, 254, 134, 196,  22, 151, 197,  21,  50, 205, 116, 154,  65, 116,
 177, 224, 127,  77, 177, 159, 225,  69, 176,  54, 100, 104, 140,   8,  11, 126,
  11, 188, 185, 159, 107,  16, 254, 142,  80,  28,   5, 157, 104,  57, 109,  82,
 102,  80, 173, 242, 238, 207,  57, 105, 237, 160,  59, 189, 189, 199,  26,  11,
 190, 156,  97, 118,  20,  12, 254, 189, 165, 147, 142, 199,   5, 213,  64, 133,
 108, 217, 133,  60,  94,  28, 116, 136,  47, 165, 125,  42, 183, 143,  14, 129,
 223,  70, 212, 205, 181, 180,   3, 201, 182,  46,  57, 104, 239,  60,  99, 181,
 220, 231,  45,  79, 156,  89, 149, 143, 190, 103, 153,  61, 235,  73, 136,  20,
  89, 243,  16, 130, 247, 141, 134,  93,  80,  68,  85,  84,   8,  72, 194,   4,
 242, 110,  19, 133, 199,  70, 172,  92, 132, 254,  67,  74,  36,  94,  13,  90,
 154, 184,   9, 109, 118, 243, 214,  71,  36,  95,   0,  90, 201, 105, 112, 215,
  69, 196, 224, 210, 236, 242, 155, 211,  37, 134,  69, 113, 157,  97,  68,  26,
 230, 149, 219, 180,  20,  76, 172, 145, 154,  40, 129,   8,  93,  56, 162, 124,
 207, 233, 105,  19,   3, 183, 155, 134,   8, 244, 213,  78, 139,  88, 156,  37,
  51, 152, 111, 102, 112, 250, 114, 252, 201, 241, 133,  24, 136, 153,   5,  90,
 210, 197, 216,  24, 131,  17, 147, 246,  13,  86,   3, 253, 179, 237, 101, 114,
 243, 191, 207,   2, 220, 133, 244,  53,  87, 125, 154, 158, 197,  20,   8,  83,
  32, 191,  38, 241, 204,  22, 168,  59, 217, 123, 162,  82,  21,  50, 130,  89,
 239, 253, 195,  56, 253,  74, 147, 125, 234, 199, 250,  28,  65, 193,  22, 237,
 193,  94,  58, 229, 139, 176,  69,  42, 179, 164, 150, 168, 246, 214,  86, 174,
  59, 117,  15,  19,  76,  37, 214, 238, 153, 226, 154,  45, 109, 114, 198, 107,
  45,  70, 238, 196, 142, 252, 244,  71, 123, 136, 134, 188,  99, 132,  25,  42,
 240,   0, 196,  33,  26, 124, 256, 145,  27, 102, 153,  35,  28, 132, 221, 167,
 138, 133,  41, 170,  95, 224,  40, 139, 239, 153,   1, 106, 255, 106, 170, 163,
 127,  44, 155, 232, 194, 119, 232, 117, 239, 143, 108,  41,   3,   9, 180, 256,
 144, 113, 133, 200,  79,  69, 128, 216,  31,  50, 102, 209, 249, 136, 150, 154,
 182,  51, 228,  39, 127, 142,  87,  15,  94,  92, 187, 245,  31, 236,  64,  58,
 114,  11,  17, 166, 189, 152, 218,  34, 123,  39,  58,  37, 153,  91,  63, 121,
  31,  34,  12, 254, 106,  96, 171,  14, 155, 247, 214,  69,  24,  98,   3, 204,
 202, 194, 207,  30, 253,  44, 119,  70,  14,  96,  82, 250,  63,   6, 232,  38,
  89, 144, 102, 191,  82, 254,  20, 222,  96, 162, 110,   6, 159,  58, 200, 226,
  98, 128,  42,  70,  84, 247, 128, 211, 136,  54, 143, 166,  60, 118,  99, 218,
  27, 193,  85,  81, 219, 223,  46,  41,  23, 233, 152, 222,  36, 236,  54, 181,
  56,  50,   4, 207, 129,  92,  78,  88, 197, 251, 131, 105,  31, 172,  38, 131,
  19, 204, 129,  47, 227, 106, 202, 183,  23,   6,  77, 224, 102, 147,  11, 218,
 131, 132,  60, 192, 208, 223, 236,  23, 103, 115,  89,  18, 185, 171,  70, 174,
 139,   0, 100, 160, 221,  11, 228,  60,  12, 122, 114,  12, 157, 235, 148,  57,
  83,  62, 173, 131, 169, 126,  85,  99,  93, 243,  81,  80,  29, 245, 206,  82,
 236, 227, 166,  14, 230, 213, 144,  97,  27, 111,  99, 164, 105, 150,  89, 111,
 252, 118, 140, 232, 120, 183, 137, 213, 232, 157, 224,  33, 134, 118, 186,  80,
 159,   2, 186, 193,  54, 242,  25, 237, 232, 249, 226, 213,  90, 149,  90, 160,
 118,  69,  64,  37,  10, 183, 109, 246,  30,  52, 219,  69, 189,  26, 116, 220,
  50, 244, 243, 243, 139, 137, 232,  98,  38,  45, 256, 143, 171, 101,  73, 238,
 123,  45, 194, 167, 250, 123,  12,  29, 136, 237, 141,  21,  89,  96, 199,  44,
   8, 214, 208,  17, 113,  41, 137,  26, 166, 155,  89,  85,  54,  58,  97, 160,
  50, 239,  58,  71,  21, 157, 139,  12,  37, 198, 182, 131, 149, 134,  16, 204,
 164, 181, 248, 166,  52, 216, 136, 201,  37, 255, 187, 240,   5, 101, 147, 231,
  14, 163, 253, 134, 146, 216,   8,  54, 224,  90, 220, 195,  75, 215, 186,  58,
  71, 204, 124, 105, 239,  53,  16,  85,  69, 163, 195, 223,  33,  38,  69,  88,
  88, 203,  99,  55, 176,  13, 156, 204, 236,  99, 194, 134,  75, 247, 126, 129,
 160, 124, 233, 206, 139, 144, 154,  45, 233,  51, 206,  61,  60,  55, 205, 107,
  84, 108,  96, 188, 203,  31,  89,  20, 115, 144, 137,  90, 237,  78, 231, 185,
 120, 217,   1, 176, 169,  30, 155, 176, 100, 113,  53,  42, 193, 108,  14, 121,
 176, 158, 137,  92, 178,  44, 110, 249, 108, 234,  94, 101, 128,  12, 250, 173,
  72, 202, 232,  66, 139, 152, 189,  18,  32, 197,   9, 238, 246,  55, 119, 183,
 196, 119, 113, 247, 191, 100, 200, 245,  46,  16, 234, 112, 136, 116, 232,  48,
 176, 108,  11, 237,  14, 153,  93, 177, 124,  72,  67, 121, 135, 143,  45,  18,
  97, 251, 184, 172, 136,  55, 213,   8, 103,  12, 221, 212,  13, 160, 116,  91,
 237, 127, 218, 190, 103, 131,  77,  82,  36, 100,  22, 252,  79,  69,  54,  26,
  65, 182, 115, 142, 247,  20,  89,  81, 188, 244,  27, 120, 240, 248,  13, 230,
  67, 133,  32, 201, 129,  87,   9, 245,  66,  88, 166,  34,  46, 184, 119, 218,
 144, 235, 163,  40, 138, 134, 127, 217,  64, 227, 116,  67,  55, 202, 130,  48,
 199,  42, 251, 112, 124, 153, 123, 194, 243,  49, 250,  12,  78, 157, 167, 134,
 210,  73, 156, 102,  21,  88, 216, 123,  45,  11, 208,  18,  47, 187,  20,  43,
   3, 180, 124,   2, 136, 176,  77, 111, 138, 139,  91, 225, 126,   8,  74, 255,
  88, 192, 193, 239, 138, 204, 139, 194, 166, 130, 252, 184, 140, 168,  30, 177,
 121,  98, 131, 124,  69, 171,  75,  49, 184,  34,  76, 122, 202, 115, 184, 253,
 120, 182,  33, 251,   1,  74, 216, 217, 243, 168,  70, 162, 119, 158, 197, 198,
  61,  89,   7,   5,  54, 199, 211, 170,  23, 226,  44, 247, 165, 195,   7, 225,
  91,  23,  50,  15,  51, 208, 106,  94,  12,  31,  43, 112, 146, 139, 246, 182,
 113,   1,  97,  15,  66,   2,  51,  76, 164, 184, 237, 200, 218, 176,  72,  98,
  33, 135,  38, 147, 140, 229,  50,  94,  81, 187, 129,  17, 238, 168, 146, 203,
 181,  99, 164,   3, 104,  98, 255, 189, 114, 142,  86, 102, 229, 102,  80, 129,
  64,  84,  79, 161,  81, 156, 128, 111, 164, 197,  18,  15,  55, 196, 198, 191,
  28, 113, 117,  96, 207, 253,  19, 158, 231,  13,  53, 130, 252, 211,  58, 180,
 212, 142,   7, 219,  38,  81,  62, 109, 167, 113,  33,  56,  97, 185, 157, 130,
 186, 129, 119, 182, 196,  26,  54, 110,  65, 170, 166, 236,  30,  22, 162,   0,
 106,  12, 248,  33,  48,  72, 159,  17,  76, 244, 172, 132,  89, 171, 196,  76,
 254, 166,  76, 218, 226,   3,  52, 220, 238, 181, 179, 144, 225,  23,   3, 166,
 158,  35, 228, 154, 204,  23, 203,  71, 134, 189,  18, 168, 236, 141, 117, 138,
   2, 132,  78,  57, 154,  21, 250, 196, 184,  40, 161,  40,  10, 178, 134, 120,
 132, 123, 101,  82, 205, 121,  55, 140, 231,  56, 231,  71, 206, 246, 198, 150,
 146, 192,  45, 105, 242,   1, 125,  18, 176,  46, 222, 122,  19,  80, 113, 133,
 131, 162,  81,  51,  98, 168, 247, 161, 139,  39,  63, 162,  22, 153, 170,  92,
  91, 130, 174, 200,  45, 112,  99, 164, 132, 184, 191, 186, 200, 167,  86, 145,
 167, 227, 130,  44,  12, 158, 172, 249, 204,  17,  54, 249,  16, 200,  21, 174,
  67, 223, 105, 201,  50,  36, 133, 203, 244, 131, 228,  67,  29, 195,  91,  91,
  55, 107, 167, 154, 170, 137, 218, 183, 169,  61,  99, 175, 128,  23, 142, 183,
  66, 255,  59, 187,  66,  85, 212, 109, 168,  82,  16,  43,  67, 139, 114, 176,
 216, 255, 130,  94, 152,  79, 183,  64, 100,  23, 214,  82,  34, 230,  48,  15,
 242, 130,  50, 241,  81,  32,   5, 125, 183, 182, 184,  99, 248, 109, 159, 210,
 226,  61, 119, 129,  39, 149,  78, 214, 107,  78, 147, 124, 228,  18, 143, 188,
  84, 180, 233, 119,  64,  39, 158, 133, 177, 168,   6, 150,  80, 117, 150,  56,
  49,  72,  49,  37,  30, 242,  49, 142,  33, 156,  34,  44,  44,  72,  58,  22,
 249,  46, 168,  80,  25, 196,  64, 174,  97, 179, 244, 134, 213, 105,  63, 151,
  21,  90, 168,  90, 245,  28, 157,  65, 250, 232, 188,  27,  99, 160, 156, 127,
  68, 193,  10,  80, 205,  36, 138, 229,  12, 223,  70, 169, 251,  41,  48,  94,
  41, 177,  99, 256, 158,   0,   6,  83, 231, 191, 120, 135, 157, 146, 218, 213,
 160,   7,  47, 234,  98, 211,  79, 225, 179,  95, 175, 105, 185,  79, 115,   0,
 104,  14,  65, 124,  15, 188,  52,   9, 253,  27, 132, 137,  13, 127,  75, 238,
 185, 253,  33,   8,  52, 157, 164,  68, 232, 188,  69,  28, 209, 233,   5, 129,
 216,  90, 252, 212,  33, 200, 222,   9, 112,  15,  43,  36, 226, 114,  15, 249,
 217,   8, 148,  22, 147,  23, 143,  67, 222, 116, 235, 250, 212, 210,  39, 142,
 108,  64, 209,  83,  73,  66,  99,  34,  17,  29,  45, 151, 244, 114,  28, 241,
 144, 208, 146, 179, 132,  89, 217, 198, 252, 219, 205, 165,  75, 107,  11, 173,
  76,   6, 196, 247, 152, 216, 248,  91, 209, 178,  57, 250, 174,  60,  79, 123,
  18, 135,   9, 241, 230, 159, 184,  68, 156, 251, 215,   9, 113, 234,  75, 235,
 103, 194, 205, 129, 230,  45,  96,  73, 157,  20, 200, 212, 212, 228, 161,   7,
 231, 228, 108,  43, 198,  87, 140, 140,   4, 182, 164,   3,  53, 104, 250, 213,
  85,  38,  89,  61,  52, 187,  35, 204,  86, 249, 100,  71, 248, 213, 163, 215,
  66, 106, 252, 129,  40, 111,  47,  24, 186, 221,  85, 205, 199, 237, 122, 181,
  32,  46, 182, 135,  33, 251, 142,  34, 208, 242, 128, 255,   4, 234,  15,  33,
 167, 222,  32, 186, 191,  34, 255, 244,  98, 240, 228, 204,  30, 142,  32,  70,
  69,  83, 110, 151,  10, 243, 141,  21, 223,  69,  61,  37,  59, 209, 102, 114,
 223,  33, 129, 254, 255, 103,  86, 247, 235,  72, 126, 177, 102, 226, 102,  30,
 149, 221,  62, 247, 251, 120, 163, 173,  57, 202, 204,  24,  39, 106, 120, 143,
 202, 176, 191, 147,  37,  38,  51, 133,  47, 245, 157, 132, 154,  71, 183, 111,
  30, 180,  18, 202,  82,  96, 170,  91, 157, 181, 212, 140, 256,   8, 196, 121,
 149,  79,  66, 127, 113,  78,   4, 197,  84, 256, 111, 222, 102,  63, 228, 104,
 136, 223,  67, 193,  93, 154, 249,  83, 204, 101, 200, 234,  84, 252, 230, 195,
  43, 140, 120, 242,  89,  63, 166, 233, 209,  94,  43, 170, 126,   5, 205,  78,
 112,  80, 143, 151, 146, 248, 137, 203,  45, 183,  61,   1, 155,   8, 102,  59,
  68, 212, 230,  61, 254, 191, 128, 223, 176, 123, 229,  27, 146, 120,  96, 165,
 213,  12, 232,  40, 186, 225,  66, 105, 200, 195, 212, 110, 237, 238, 151,  19,
  12, 171, 150,  82,   7, 228,  79,  52,  15,  78,  62,  43,  21, 154, 114,  21,
  12, 212, 256, 232, 125, 127,   5,  51,  37, 252, 136,  13,  47, 195, 168, 191,
 231,  55,  57, 251, 214, 116,  15,  86, 210,  41, 249, 242, 119,  27, 250, 203,
 107,  69,  90,  43, 206, 154, 127,  54, 100,  78, 187,  54, 244, 177, 234, 167,
 202, 136, 209, 171,  69, 114, 133, 173,  26, 139,  78, 141, 128,  32, 124,  39,
  45, 218,  96,  68,  90,  44,  67,  62,  83, 190, 188, 256, 103,  42, 102,  64,
 249,   0, 141,  11,  61,  69,  70,  66, 233, 237,  29, 200, 251, 157,  71,  51,
  64, 133, 113,  76,  35, 125,  76, 137, 217, 145,  35,  69, 226, 180,  56, 249,
 156, 163, 176, 237,  81,  54,  85, 169, 115, 211, 129,  70, 248,  40, 252, 192,
 194, 101, 247,   8, 181, 124, 217, 191, 194,  93,  99, 127, 117, 177, 144, 151,
 228, 121,  32,  11,  89,  81,  26,  29, 183,  76, 249, 132, 179,  70,  34, 102,
  20,  66,  87,  63, 124, 205, 174, 177,  87, 219,  73, 218,  91,  87, 176,  72,
  15, 211,  47,  61, 251, 165,  39, 247, 146,  70, 150,  57,   1, 212,  36, 162,
  39,  38,  16, 216,   3,  50, 116, 200,  32, 234,  77, 181, 155,  19,  90, 188,
  36,   6, 254,  46,  46, 203,  25, 230, 181, 196,   4, 151, 225,  65, 122, 216,
 168,  86, 158, 131, 136,  16,  49, 102, 233,  64, 154,  88, 228,  52, 146,  69,
  93, 157, 243, 121,  70, 209, 126, 213,  88, 145, 236,  65,  70,  96, 204,  47,
  10, 200,  77,   8, 103, 150,  48, 153,   5,  37,  52, 235, 209,  31, 181, 126,
  83, 142, 224, 140,   6,  32, 200, 171, 160, 179, 115, 229,  75, 194, 208,  39,
  59, 223,  52, 247,  38, 197, 135,   1,   6, 189, 106, 114, 168,   5, 211, 222,
  44,  63,  90, 160, 116, 172, 170, 133, 125, 138,  39, 131,  23, 178,  10, 214,
  36,  93,  28,  59,  68,  17, 123,  25, 255, 184, 204, 102, 194, 214, 129,  94,
 159, 245, 112, 141,  62,  11,  61, 197, 124, 221, 205,  11,  79,  71, 201,  54,
  58, 150,  29, 121,  87,  46, 240, 201,  68,  20, 194, 209,  47, 152, 158, 174,
 193, 164, 120, 255, 216, 165, 247,  58,  85, 130, 220,  23, 122, 223, 188,  98,
  21,  70,  72, 170, 150, 237,  76, 143, 112, 238, 206, 146, 215, 110,   4, 250,
  68,  44, 174, 177,  30,  98, 143, 241, 180, 127, 113,  48,   0,   1, 179, 199,
  59, 106, 201, 114,  29,  86, 173, 133, 217,  44, 200, 141, 107, 172,  16,  60,
  82,  58, 239,  94, 141, 234, 186, 235, 109, 173, 249, 139, 141,  59, 100, 248,
  84, 144,  49, 160,  51, 207, 164, 103,  74,  97, 146, 202, 193, 125, 168, 134,
 236, 111, 135, 121,  59, 145, 168, 200, 181, 173, 109,   2, 255,   6,   9, 245,
  90, 202, 214, 143, 121,  65,  85, 232, 132,  77, 228,  84,  26,  54, 184,  15,
 161,  29, 177,  79,  43,   0, 156, 184, 163, 165,  62,  90, 179,  93,  45, 239,
   1,  16, 120, 189, 127,  47,  74, 166,  20, 214, 233, 226,  89, 217, 229,  26,
 156,  53, 162,  60,  21,   3, 192,  72, 111,  51,  53, 101, 181, 208,  88,  82,
 179, 160, 219, 113, 240, 108,  43, 224, 162, 147,  62,  14,  95,  81, 205,   4,
 160, 177, 225, 115,  29,  69, 235, 168, 148,  29, 128, 114, 124, 129, 172, 165,
 215, 231, 214,  86, 160,  44, 157,  91, 248, 183,  73, 164,  56, 181, 162,  92,
 141, 118, 127, 240, 196,  77,   0,   9, 244,  79, 250, 100, 195,  25, 255,  85,
  94,  35, 212, 137, 107,  34, 110,  20, 200, 104,  17,  32, 231,  43, 150, 159,
 231, 216, 223, 190, 226, 109, 162, 197,  87,  92, 224,  11, 111,  73,  60, 225,
 238,  73, 246, 169,  19, 217, 119,  38, 121, 118,  70,  82,  99, 241, 110,  67,
  31,  76, 146, 215, 124, 240,  31, 103, 139, 224,  75, 160,  31,  78,  93,   4,
  64,   9, 103, 223,   6, 227, 119,  85, 116,  81,  21,  43,  46, 206, 234, 132,
  85,  99,  22, 131, 135,  97,  86,  13, 234, 188,  21,  14,  89, 169, 207, 238,
 219, 177, 190,  72, 157,  41, 114, 140,  92, 141, 186,   1,  63, 107, 225, 184,
 118, 150, 153, 254, 241, 106, 120, 210, 104, 144, 151, 161,  88, 206, 125, 164,
  15, 211, 173,  49, 146, 241,  71,  36,  58, 201,  46,  27,  33, 187,  91, 162,
 117,  19, 210, 213, 187,  97, 193,  50, 190, 114, 217,  60,  61, 167, 207, 213,
 213,  53, 135,  34, 156,  91, 115, 119,  46,  99, 242,   1,  90,  52, 198, 227,
 201,  91, 216, 146, 210,  82, 121,  38,  73, 133, 182, 193, 132, 148, 246,  75,
 109, 157, 179, 113, 176, 134, 205, 159, 148,  58, 103, 171, 132, 156, 133, 147,
 161, 231,  39, 100, 175,  97, 125,  28, 183, 129, 135, 191, 202, 181,  29, 218,
  43, 104, 148, 203, 189, 204,   4, 182, 169,   1, 134, 122, 141, 202,  13, 187,
 177, 112, 162,  35, 231,   6,   8, 241,  99,   6, 191,  45, 113, 113, 101, 104};

// The S-Box we use for further linearity breaking.
// We created it by taking the digits of decimal expansion of e.
// The code that created it can be found in 'ProduceRandomSBox.c'.
unsigned char SBox[256] = {
//0     1    2      3     4    5     6     7      8    9     A      B    C     D     E     F
0x7d, 0xd1, 0x70, 0x0b, 0xfa, 0x39, 0x18, 0xc3, 0xf3, 0xbb, 0xa7, 0xd4, 0x84, 0x25, 0x3b, 0x3c,   // 0
0x2c, 0x15, 0x69, 0x9a, 0xf9, 0x27, 0xfb, 0x02, 0x52, 0xba, 0xa8, 0x4b, 0x20, 0xb5, 0x8b, 0x3a,   // 1
0x88, 0x8e, 0x26, 0xcb, 0x71, 0x5e, 0xaf, 0xad, 0x0c, 0xac, 0xa1, 0x93, 0xc6, 0x78, 0xce, 0xfc,   // 2
0x2a, 0x76, 0x17, 0x1f, 0x62, 0xc2, 0x2e, 0x99, 0x11, 0x37, 0x65, 0x40, 0xfd, 0xa0, 0x03, 0xc1,   // 3
0xca, 0x48, 0xe2, 0x9b, 0x81, 0xe4, 0x1c, 0x01, 0xec, 0x68, 0x7a, 0x5a, 0x50, 0xf8, 0x0e, 0xa3,   // 4
0xe8, 0x61, 0x2b, 0xa2, 0xeb, 0xcf, 0x8c, 0x3d, 0xb4, 0x95, 0x13, 0x08, 0x46, 0xab, 0x91, 0x7b,   // 5
0xea, 0x55, 0x67, 0x9d, 0xdd, 0x29, 0x6a, 0x8f, 0x9f, 0x22, 0x4e, 0xf2, 0x57, 0xd2, 0xa9, 0xbd,   // 6
0x38, 0x16, 0x5f, 0x4c, 0xf7, 0x9e, 0x1b, 0x2f, 0x30, 0xc7, 0x41, 0x24, 0x5c, 0xbf, 0x05, 0xf6,   // 7
0x0a, 0x31, 0xa5, 0x45, 0x21, 0x33, 0x6b, 0x6d, 0x6c, 0x86, 0xe1, 0xa4, 0xe6, 0x92, 0x9c, 0xdf,   // 8
0xe7, 0xbe, 0x28, 0xe3, 0xfe, 0x06, 0x4d, 0x98, 0x80, 0x04, 0x96, 0x36, 0x3e, 0x14, 0x4a, 0x34,   // 9
0xd3, 0xd5, 0xdb, 0x44, 0xcd, 0xf5, 0x54, 0xdc, 0x89, 0x09, 0x90, 0x42, 0x87, 0xff, 0x7e, 0x56,   // A
0x5d, 0x59, 0xd7, 0x23, 0x75, 0x19, 0x97, 0x73, 0x83, 0x64, 0x53, 0xa6, 0x1e, 0xd8, 0xb0, 0x49,   // B
0x3f, 0xef, 0xbc, 0x7f, 0x43, 0xf0, 0xc9, 0x72, 0x0f, 0x63, 0x79, 0x2d, 0xc0, 0xda, 0x66, 0xc8,   // C
0x32, 0xde, 0x47, 0x07, 0xb8, 0xe9, 0x1d, 0xc4, 0x85, 0x74, 0x82, 0xcc, 0x60, 0x51, 0x77, 0x0d,   // D
0xaa, 0x35, 0xed, 0x58, 0x7c, 0x5b, 0xb9, 0x94, 0x6e, 0x8d, 0xb1, 0xc5, 0xb7, 0xee, 0xb6, 0xae,   // E
0x10, 0xe0, 0xd6, 0xd9, 0xe5, 0x4f, 0xf1, 0x12, 0x00, 0xd0, 0xf4, 0x1a, 0x6f, 0x8a, 0xb3, 0xb2 }; // F

///////////////////////////////////////////////////////////////////////////////////////////////
//
//	Helper functions definition portion.
//
///////////////////////////////////////////////////////////////////////////////////////////////

// Translates an input array with values in base 257 to output array with values in base 256.
// Returns the carry bit.
//
// Parameters:
// - input: the input array of size EIGHTH_N. Each value in the array is a number in Z_257.
//          The MSB is assumed to be the last one in the array.
// - output: the input array encoded in base 256.
//
// Returns:
// - The carry bit (MSB).
swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N]);

// Translates an input integer into the range (-FIELD_SIZE / 2) <= result <= (FIELD_SIZE / 2).
//
// Parameters:
// - x: the input integer.
//
// Returns:
// - The result, which equals (x MOD FIELD_SIZE), such that |result| <= (FIELD_SIZE / 2).
int Center(int x);

// Calculates bit reversal permutation.
//
// Parameters:
// - input: the input to reverse.
// - numOfBits: the number of bits in the input to reverse.
//
// Returns:
// - The resulting number, which is obtained from the input by reversing its bits.
int ReverseBits(int input, int numOfBits);

// Initializes the FFT fast lookup table.
// Shall be called only once.
void InitializeSWIFFTX();

// Calculates the FFT.
//
// Parameters:
// - input: the input to the FFT.
// - output: the resulting output.
void FFT(const unsigned char input[EIGHTH_N], swift_int32_t *output);

///////////////////////////////////////////////////////////////////////////////////////////////
// Helper functions implementation portion.
///////////////////////////////////////////////////////////////////////////////////////////////

swift_int16_t TranslateToBase256(swift_int32_t input[EIGHTH_N], unsigned char output[EIGHTH_N])
{
	swift_int32_t pairs[EIGHTH_N / 2];
	int i;

	for (i = 0; i < EIGHTH_N; i += 2)
	{
		// input[i] + 257 * input[i + 1]
		pairs[i >> 1] = input[i] + input[i + 1] + (input[i + 1] << 8);
	}

	for (i = (EIGHTH_N / 2) - 1; i > 0; --i)
	{
		int j;

		for (j = i - 1; j < (EIGHTH_N / 2) - 1; ++j)
		{
			// pairs[j + 1] * 513, because 257^2 = 513 % 256^2.
			register swift_int32_t temp = pairs[j] + pairs[j + 1] + (pairs[j + 1] << 9);
			pairs[j] = temp & 0xffff;
			pairs[j + 1] += (temp >> 16);
		}
	}

	for (i = 0; i < EIGHTH_N; i += 2)
	{
		output[i] = (unsigned char) (pairs[i >> 1] & 0xff);
		output[i + 1] = (unsigned char) ((pairs[i >> 1] >> 8) & 0xff);
	}

	return (pairs[EIGHTH_N/2 - 1] >> 16);
}

int Center(int x)
{
	int result = x % FIELD_SIZE;

	if (result > (FIELD_SIZE / 2))
		result -= FIELD_SIZE;

	if (result < (FIELD_SIZE / -2))
		result += FIELD_SIZE;

	return result;
}

int ReverseBits(int input, int numOfBits)
{
	register int reversed = 0;

	for (input |= numOfBits; input > 1; input >>= 1)
		reversed = (reversed << 1) | (input & 1);

	return reversed;
}

void InitializeSWIFFTX()
{
	int i, j, k, x;
	// The powers of OMEGA
	int omegaPowers[2 * N];
	omegaPowers[0] = 1;

	if (wasSetupDone) return;

	for (i = 1; i < (2 * N); ++i)
		omegaPowers[i] = Center(omegaPowers[i - 1] * OMEGA);

	for (i = 0; i < (N / W); ++i)
		for (j = 0; j < W; ++j)
			multipliers[(i << 3) + j] = omegaPowers[ReverseBits(i, N / W) * (2 * j + 1)];

	for (x = 0; x < 256; ++x)
	{
		for (j = 0; j < 8; ++j)
		{
			register int temp = 0;
			for (k = 0; k < 8; ++k)
				temp += omegaPowers[(EIGHTH_N * (2 * j + 1) * ReverseBits(k, W)) % (2 * N)]
					  * ((x >> k) & 1);

			fftTable[(x << 3) + j] = Center(temp);
		}
	}

	wasSetupDone = true;
}

// In the original code the F matrix is rotated so it was not aranged
// the same as the other data. Rearanging F made vectorizing up to 256 bits
// possible. 
// Also in the original code the custom 16 bit data types are all now aliased
// to 32 bit int32_t.

void FFT( const unsigned char input[EIGHTH_N], swift_int32_t *output )
{
#if defined(__AVX2__)

   __m256i F0, F1, F2, F3, F4, F5, F6, F7;
   __m256i *table = (__m256i*)fftTable;
   __m256i tbl = table[ input[0] ];
   __m256i *mul = (__m256i*)multipliers;
   __m256i *out = (__m256i*)output;

   F0 = _mm256_mullo_epi32( mul[0], tbl );
   tbl = table[ input[1] ];
   F1 = _mm256_mullo_epi32( mul[1], tbl );
   tbl = table[ input[2] ];
   F2 = _mm256_mullo_epi32( mul[2], tbl );
   tbl = table[ input[3] ];
   F3 = _mm256_mullo_epi32( mul[3], tbl );
   tbl = table[ input[4] ];
   F4 = _mm256_mullo_epi32( mul[4], tbl );
   tbl = table[ input[5] ];
   F5 = _mm256_mullo_epi32( mul[5], tbl );
   tbl = table[ input[6] ];
   F6 = _mm256_mullo_epi32( mul[6], tbl );
   tbl = table[ input[7]  ];
   F7 = _mm256_mullo_epi32( mul[7], tbl );

   #define ADD_SUB( a, b ) \
   { \
      __m256i tmp = b; \
      b = _mm256_sub_epi32( a, b ); \
      a = _mm256_add_epi32( a, tmp ); \
   }
   
   ADD_SUB( F0, F1 );
   ADD_SUB( F2, F3 );
   ADD_SUB( F4, F5 );
   ADD_SUB( F6, F7 );
   F3 = _mm256_slli_epi32( F3, 4 );
   F7 = _mm256_slli_epi32( F7, 4 );
   ADD_SUB( F0, F2 );
   ADD_SUB( F1, F3 );
   ADD_SUB( F4, F6 );
   ADD_SUB( F5, F7 );  
   F6 = _mm256_slli_epi32( F6, 4 );
   F7 = _mm256_slli_epi32( F7, 6 );
   F5 = _mm256_slli_epi32( F5, 2 );
   ADD_SUB( F0, F4 );
   ADD_SUB( F1, F5 );
   ADD_SUB( F2, F6 );
   ADD_SUB( F3, F7 );

   #undef ADD_SUB

#if defined(VL256)   

   #define Q_REDUCE( a ) \
       _mm256_sub_epi32( _mm256_maskz_mov_epi8( 0x11111111, a ), \
                         _mm256_srai_epi32( a, 8 ) )
         
#else

   const __m256i mask = _mm256_set1_epi32( 0x000000ff );

   #define Q_REDUCE( a ) \
       _mm256_sub_epi32( _mm256_and_si256( a, mask ), \
                         _mm256_srai_epi32( a, 8 ) )
   
#endif

   out[0] = Q_REDUCE( F0 );  
   out[1] = Q_REDUCE( F1 );                        
   out[2] = Q_REDUCE( F2 );                        
   out[3] = Q_REDUCE( F3 );                        
   out[4] = Q_REDUCE( F4 );                        
   out[5] = Q_REDUCE( F5 );                        
   out[6] = Q_REDUCE( F6 );                        
   out[7] = Q_REDUCE( F7 );

   #undef Q_REDUCE

#elif defined(__SSE4_1__) || defined(__ARM_NEON)

   v128_t F[16] __attribute__ ((aligned (64)));
   v128_t *mul = (v128_t*)multipliers;
   v128_t *out = (v128_t*)output;
   v128_t *tbl = (v128_t*)&( fftTable[ input[0] << 3 ] );

   F[ 0] = v128_mul32( mul[ 0], tbl[0] );
   F[ 1] = v128_mul32( mul[ 1], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[1] << 3 ] );
   F[ 2] = v128_mul32( mul[ 2], tbl[0] );
   F[ 3] = v128_mul32( mul[ 3], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[2] << 3 ] );
   F[ 4] = v128_mul32( mul[ 4], tbl[0] );
   F[ 5] = v128_mul32( mul[ 5], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[3] << 3 ] );
   F[ 6] = v128_mul32( mul[ 6], tbl[0] );
   F[ 7] = v128_mul32( mul[ 7], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[4] << 3 ] );
   F[ 8] = v128_mul32( mul[ 8], tbl[0] );
   F[ 9] = v128_mul32( mul[ 9], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[5] << 3 ] );
   F[10] = v128_mul32( mul[10], tbl[0] );
   F[11] = v128_mul32( mul[11], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[6] << 3 ] );
   F[12] = v128_mul32( mul[12], tbl[0] );
   F[13] = v128_mul32( mul[13], tbl[1] );
   tbl = (v128_t*)&( fftTable[ input[7] << 3 ] );
   F[14] = v128_mul32( mul[14], tbl[0] );
   F[15] = v128_mul32( mul[15], tbl[1] );

   #define ADD_SUB( a, b ) \
   { \
      v128_t tmp = b; \
      b = v128_sub32( a, b ); \
      a = v128_add32( a, tmp ); \
   }

   ADD_SUB( F[ 0], F[ 2] );
   ADD_SUB( F[ 1], F[ 3] );
   ADD_SUB( F[ 4], F[ 6] );
   ADD_SUB( F[ 5], F[ 7] );
   ADD_SUB( F[ 8], F[10] );
   ADD_SUB( F[ 9], F[11] );
   ADD_SUB( F[12], F[14] );
   ADD_SUB( F[13], F[15] );
   F[ 6] = v128_sl32( F[ 6], 4 );
   F[ 7] = v128_sl32( F[ 7], 4 );
   F[14] = v128_sl32( F[14], 4 );
   F[15] = v128_sl32( F[15], 4 );
   ADD_SUB( F[ 0], F[ 4] );
   ADD_SUB( F[ 1], F[ 5] );
   ADD_SUB( F[ 2], F[ 6] );
   ADD_SUB( F[ 3], F[ 7] );
   ADD_SUB( F[ 8], F[12] );
   ADD_SUB( F[ 9], F[13] );
   ADD_SUB( F[10], F[14] );
   ADD_SUB( F[11], F[15] );
   F[10] = v128_sl32( F[10], 2 );
   F[11] = v128_sl32( F[11], 2 );
   F[12] = v128_sl32( F[12], 4 );
   F[13] = v128_sl32( F[13], 4 );
   F[14] = v128_sl32( F[14], 6 );
   F[15] = v128_sl32( F[15], 6 );
   ADD_SUB( F[ 0], F[ 8] );
   ADD_SUB( F[ 1], F[ 9] );
   ADD_SUB( F[ 2], F[10] );
   ADD_SUB( F[ 3], F[11] );
   ADD_SUB( F[ 4], F[12] );
   ADD_SUB( F[ 5], F[13] );
   ADD_SUB( F[ 6], F[14] );
   ADD_SUB( F[ 7], F[15] );

   #undef ADD_SUB

   const v128_t mask = v128_32( 0x000000ff );

   #define Q_REDUCE( a ) \
      v128_sub32( v128_and( a, mask ), v128_sra32( a, 8 ) ) 

   out[ 0] = Q_REDUCE( F[ 0] );
   out[ 1] = Q_REDUCE( F[ 1] );
   out[ 2] = Q_REDUCE( F[ 2] );
   out[ 3] = Q_REDUCE( F[ 3] );
   out[ 4] = Q_REDUCE( F[ 4] );
   out[ 5] = Q_REDUCE( F[ 5] );
   out[ 6] = Q_REDUCE( F[ 6] );
   out[ 7] = Q_REDUCE( F[ 7] );
   out[ 8] = Q_REDUCE( F[ 8] );
   out[ 9] = Q_REDUCE( F[ 9] );
   out[10] = Q_REDUCE( F[10] );
   out[11] = Q_REDUCE( F[11] );
   out[12] = Q_REDUCE( F[12] );
   out[13] = Q_REDUCE( F[13] );
   out[14] = Q_REDUCE( F[14] );
   out[15] = Q_REDUCE( F[15] );

   #undef Q_REDUCE

#else   // AVX256 elif SSE4_1
   
   swift_int16_t *mult = multipliers;
	swift_int16_t *table = &( fftTable[ input[0] << 3 ] );
   swift_int32_t F[64];

   /*
   for (int i = 0; i < 8; i++)
   {
      int j = i<<3;
      swift_int16_t *table = &(fftTable[input[i] << 3]);
      F[i   ] = mult[j+0] * table[0];
      F[i+ 8] = mult[j+1] * table[1];
      F[i+16] = mult[j+2] * table[2];
      F[i+24] = mult[j+3] * table[3];
      F[i+32] = mult[j+4] * table[4];
      F[i+40] = mult[j+5] * table[5];
      F[i+48] = mult[j+6] * table[6];
      F[i+56] = mult[j+7] * table[7];
   }
*/

	F[ 0] = mult[ 0] * table[0];
	F[ 8] = mult[ 1] * table[1];
	F[16] = mult[ 2] * table[2];
	F[24] = mult[ 3] * table[3];
	F[32] = mult[ 4] * table[4];
	F[40] = mult[ 5] * table[5];
	F[48] = mult[ 6] * table[6];
	F[56] = mult[ 7] * table[7];

	table = &(fftTable[input[1] << 3]);

	F[ 1] = mult[ 8] * table[0];
	F[ 9] = mult[ 9] * table[1];
	F[17] = mult[10] * table[2];
	F[25] = mult[11] * table[3];
	F[33] = mult[12] * table[4];
	F[41] = mult[13] * table[5];
	F[49] = mult[14] * table[6];
	F[57] = mult[15] * table[7];

	table = &(fftTable[input[2] << 3]);

	F[ 2] = mult[16] * table[0];
	F[10] = mult[17] * table[1];
	F[18] = mult[18] * table[2];
	F[26] = mult[19] * table[3];
	F[34] = mult[20] * table[4];
	F[42] = mult[21] * table[5];
	F[50] = mult[22] * table[6];
	F[58] = mult[23] * table[7];

	table = &(fftTable[input[3] << 3]);

	F[ 3] = mult[24] * table[0];
	F[11] = mult[25] * table[1];
	F[19] = mult[26] * table[2];
	F[27] = mult[27] * table[3];
	F[35] = mult[28] * table[4];
	F[43] = mult[29] * table[5];
	F[51] = mult[30] * table[6];
	F[59] = mult[31] * table[7];

	table = &(fftTable[input[4] << 3]);

	F[ 4] = mult[32] * table[0];
	F[12] = mult[33] * table[1];
	F[20] = mult[34] * table[2];
	F[28] = mult[35] * table[3];
	F[36] = mult[36] * table[4];
	F[44] = mult[37] * table[5];
	F[52] = mult[38] * table[6];
	F[60] = mult[39] * table[7];

	table = &(fftTable[input[5] << 3]);

	F[ 5] = mult[40] * table[0];
	F[13] = mult[41] * table[1];
	F[21] = mult[42] * table[2];
	F[29] = mult[43] * table[3];
	F[37] = mult[44] * table[4];
	F[45] = mult[45] * table[5];
	F[53] = mult[46] * table[6];
	F[61] = mult[47] * table[7];

	table = &(fftTable[input[6] << 3]);

	F[ 6] = mult[48] * table[0];
	F[14] = mult[49] * table[1];
	F[22] = mult[50] * table[2];
	F[30] = mult[51] * table[3];
	F[38] = mult[52] * table[4];
	F[46] = mult[53] * table[5];
	F[54] = mult[54] * table[6];
	F[62] = mult[55] * table[7];

	table = &(fftTable[input[7] << 3]);

	F[ 7] = mult[56] * table[0];
	F[15] = mult[57] * table[1];
	F[23] = mult[58] * table[2];
	F[31] = mult[59] * table[3];
	F[39] = mult[60] * table[4];
	F[47] = mult[61] * table[5];
	F[55] = mult[62] * table[6];
	F[63] = mult[63] * table[7];

   #define ADD_SUB( a, b ) \
   { \
      int temp = b; \
      b = a - b; \
      a = a + temp; \
   }
   
   #define Q_REDUCE( a ) \
      ( ( (a) & 0xff ) - ( (a) >> 8 ) )
   
/*

   for ( int i = 0; i < 8; i++ )
   {
      int j = i<<3;
      ADD_SUB( F[j  ], F[j+1] );
      ADD_SUB( F[j+2], F[j+3] );
      ADD_SUB( F[j+4], F[j+5] );
      ADD_SUB( F[j+6], F[j+7] );

      F[j+3] <<= 4;
      F[j+7] <<= 4;

      ADD_SUB( F[j  ], F[j+2] );
      ADD_SUB( F[j+1], F[j+3] );
      ADD_SUB( F[j+4], F[j+6] );
      ADD_SUB( F[j+5], F[j+7] );

      F[j+5] <<= 2;
      F[j+6] <<= 4;
      F[j+7] <<= 6;

      ADD_SUB( F[j  ], F[j+4] );
      ADD_SUB( F[j+1], F[j+5] );
      ADD_SUB( F[j+2], F[j+6] );
      ADD_SUB( F[j+3], F[j+7] );

      output[i   ] = Q_REDUCE( F[j  ] );
      output[i+ 8] = Q_REDUCE( F[j+1] );
      output[i+16] = Q_REDUCE( F[j+2] );
      output[i+24] = Q_REDUCE( F[j+3] );
      output[i+32] = Q_REDUCE( F[j+4] );
      output[i+40] = Q_REDUCE( F[j+5] );
      output[i+48] = Q_REDUCE( F[j+6] );
      output[i+56] = Q_REDUCE( F[j+7] );
   }
*/

	// Iteration 0:
	ADD_SUB( F[ 0], F[ 1] );
	ADD_SUB( F[ 2], F[ 3] );
	ADD_SUB( F[ 4], F[ 5] );
	ADD_SUB( F[ 6], F[ 7] );
	F[ 3] <<= 4;
	F[ 7] <<= 4;
	ADD_SUB( F[ 0], F[ 2] );
	ADD_SUB( F[ 1], F[ 3] );
	ADD_SUB( F[ 4], F[ 6] );
	ADD_SUB( F[ 5], F[ 7] );
	F[ 5] <<= 2;
	F[ 6] <<= 4;
	F[ 7] <<= 6;
	ADD_SUB( F[ 0], F[ 4] );
	ADD_SUB( F[ 1], F[ 5] );
	ADD_SUB( F[ 2], F[ 6] );
	ADD_SUB( F[ 3], F[ 7] );

   output[ 0] = Q_REDUCE( F[ 0] );
	output[ 8] = Q_REDUCE( F[ 1] );
	output[16] = Q_REDUCE( F[ 2] );
	output[24] = Q_REDUCE( F[ 3] );
	output[32] = Q_REDUCE( F[ 4] );
	output[40] = Q_REDUCE( F[ 5] );
	output[48] = Q_REDUCE( F[ 6] );
	output[56] = Q_REDUCE( F[ 7] );

	// Iteration 1:
	ADD_SUB( F[ 8], F[ 9] );
	ADD_SUB( F[10], F[11] );
	ADD_SUB( F[12], F[13] );
	ADD_SUB( F[14], F[15] );
	F[11] <<= 4;
	F[15] <<= 4;
	ADD_SUB( F[ 8], F[10] );
	ADD_SUB( F[ 9], F[11] );
	ADD_SUB( F[12], F[14] );
	ADD_SUB( F[13], F[15] );
	F[13] <<= 2;
	F[14] <<= 4;
	F[15] <<= 6;
	ADD_SUB( F[ 8], F[12] );
	ADD_SUB( F[ 9], F[13] );
	ADD_SUB( F[10], F[14] );
	ADD_SUB( F[11], F[15] );

	output[ 1] = Q_REDUCE( F[ 8] );
	output[ 9] = Q_REDUCE( F[ 9] );
	output[17] = Q_REDUCE( F[10] );
	output[25] = Q_REDUCE( F[11] );
	output[33] = Q_REDUCE( F[12] );
	output[41] = Q_REDUCE( F[13] );
	output[49] = Q_REDUCE( F[14] );
	output[57] = Q_REDUCE( F[15] );

	// Iteration 2:
	ADD_SUB( F[16], F[17] );
	ADD_SUB( F[18], F[19] );
	ADD_SUB( F[20], F[21] );
	ADD_SUB( F[22], F[23] );
	F[19] <<= 4;
	F[23] <<= 4;
	ADD_SUB( F[16], F[18]);
	ADD_SUB( F[17], F[19]);
	ADD_SUB( F[20], F[22]);
	ADD_SUB( F[21], F[23]);
	F[21] <<= 2;
	F[22] <<= 4;
	F[23] <<= 6;
	ADD_SUB( F[16], F[20] );
	ADD_SUB( F[17], F[21] );
	ADD_SUB( F[18], F[22] );
	ADD_SUB( F[19], F[23] );

	output[ 2] = Q_REDUCE( F[16] );
	output[10] = Q_REDUCE( F[17] );
	output[18] = Q_REDUCE( F[18] );
	output[26] = Q_REDUCE( F[19] );
	output[34] = Q_REDUCE( F[20] );
	output[42] = Q_REDUCE( F[21] );
	output[50] = Q_REDUCE( F[22] );
	output[58] = Q_REDUCE( F[23] );

	// Iteration 3:
	ADD_SUB( F[24], F[25] );
	ADD_SUB( F[26], F[27] );
	ADD_SUB( F[28], F[29] );
	ADD_SUB( F[30], F[31] );
 	F[27] <<= 4;
 	F[31] <<= 4;
	ADD_SUB( F[24], F[26] );
	ADD_SUB( F[25], F[27] );
	ADD_SUB( F[28], F[30] );
	ADD_SUB( F[29], F[31] );
	F[29] <<= 2;
	F[30] <<= 4;
	F[31] <<= 6;
	ADD_SUB( F[24], F[28] );
	ADD_SUB( F[25], F[29] );
	ADD_SUB( F[26], F[30] );
	ADD_SUB( F[27], F[31] );

	output[ 3] = Q_REDUCE( F[24] );
	output[11] = Q_REDUCE( F[25] );
	output[19] = Q_REDUCE( F[26] );
	output[27] = Q_REDUCE( F[27] );
	output[35] = Q_REDUCE( F[28] );
	output[43] = Q_REDUCE( F[29] );
	output[51] = Q_REDUCE( F[30] );
	output[59] = Q_REDUCE( F[31] );

	// Iteration 4:
	ADD_SUB( F[32], F[33] );
	ADD_SUB( F[34], F[35] );
	ADD_SUB( F[36], F[37] );
	ADD_SUB( F[38], F[39] );
	F[35] <<= 4;
	F[39] <<= 4;
	ADD_SUB( F[32], F[34] );
	ADD_SUB( F[33], F[35] );
	ADD_SUB( F[36], F[38] );
	ADD_SUB( F[37], F[39] );
	F[37] <<= 2;
	F[38] <<= 4;
	F[39] <<= 6;
	ADD_SUB( F[32], F[36] );
	ADD_SUB( F[33], F[37] );
	ADD_SUB( F[34], F[38] );
	ADD_SUB( F[35], F[39] );

	output[ 4] = Q_REDUCE( F[32] );
	output[12] = Q_REDUCE( F[33] );
	output[20] = Q_REDUCE( F[34] );
	output[28] = Q_REDUCE( F[35] );
	output[36] = Q_REDUCE( F[36] );
	output[44] = Q_REDUCE( F[37] );
	output[52] = Q_REDUCE( F[38] );
	output[60] = Q_REDUCE( F[39] );

	// Iteration 5:
	ADD_SUB( F[40], F[41] );
	ADD_SUB( F[42], F[43] );
	ADD_SUB( F[44], F[45] );
	ADD_SUB( F[46], F[47] );
	F[43] <<= 4;
	F[47] <<= 4;
	ADD_SUB( F[40], F[42] );
	ADD_SUB( F[41], F[43] );
	ADD_SUB( F[44], F[46] );
	ADD_SUB( F[45], F[47] );
	F[45] <<= 2;
	F[46] <<= 4;
	F[47] <<= 6;
	ADD_SUB( F[40], F[44] );
	ADD_SUB( F[41], F[45] );
	ADD_SUB( F[42], F[46] );
	ADD_SUB( F[43], F[47] );

	output[ 5] = Q_REDUCE( F[40] );
	output[13] = Q_REDUCE( F[41] );
	output[21] = Q_REDUCE( F[42] );
	output[29] = Q_REDUCE( F[43] );
	output[37] = Q_REDUCE( F[44] );
	output[45] = Q_REDUCE( F[45] );
	output[53] = Q_REDUCE( F[46] );
	output[61] = Q_REDUCE( F[47] );

	// Iteration 6:
	ADD_SUB( F[48], F[49] );
	ADD_SUB( F[50], F[51] );
	ADD_SUB( F[52], F[53] );
	ADD_SUB( F[54], F[55] );
	F[51] <<= 4;
	F[55] <<= 4;
	ADD_SUB( F[48], F[50] );
	ADD_SUB( F[49], F[51] );
	ADD_SUB( F[52], F[54] );
	ADD_SUB( F[53], F[55] );
	F[53] <<= 2;
	F[54] <<= 4;
	F[55] <<= 6;
	ADD_SUB( F[48], F[52] );
	ADD_SUB( F[49], F[53] );
	ADD_SUB( F[50], F[54] );
	ADD_SUB( F[51], F[55] );

	output[ 6] = Q_REDUCE( F[48] );
	output[14] = Q_REDUCE( F[49] );
	output[22] = Q_REDUCE( F[50] );
	output[30] = Q_REDUCE( F[51] );
	output[38] = Q_REDUCE( F[52] );
	output[46] = Q_REDUCE( F[53] );
	output[54] = Q_REDUCE( F[54] );
	output[62] = Q_REDUCE( F[55] );

	// Iteration 7:
	ADD_SUB( F[56], F[57] );
	ADD_SUB( F[58], F[59] );
	ADD_SUB( F[60], F[61] );
	ADD_SUB( F[62], F[63] );
	F[59] <<= 4;
	F[63] <<= 4;
	ADD_SUB( F[56], F[58] );
	ADD_SUB( F[57], F[59] );
	ADD_SUB( F[60], F[62] );
	ADD_SUB( F[61], F[63] );
	F[61] <<= 2;
	F[62] <<= 4;
	F[63] <<= 6;
	ADD_SUB( F[56], F[60] );
	ADD_SUB( F[57], F[61] );
	ADD_SUB( F[58], F[62] );
	ADD_SUB( F[59], F[63] );

	output[ 7] = Q_REDUCE( F[56] );
	output[15] = Q_REDUCE( F[57] );
	output[23] = Q_REDUCE( F[58] );
	output[31] = Q_REDUCE( F[59] );
	output[39] = Q_REDUCE( F[60] );
	output[47] = Q_REDUCE( F[61] );
	output[55] = Q_REDUCE( F[62] );
	output[63] = Q_REDUCE( F[63] );

   #undef ADD_SUB
   #undef Q_REDUCE

#endif  // AVX2 elif SSE4.1 else
}

// Calculates the FFT part of SWIFFT.
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
// is only the A's part.
//
// Parameters:
// - input: the input to FFT.
// - m: the input size divided by 8. The function performs m FFTs.
// - output: will store the result.
void SWIFFTFFT(const unsigned char *input, int m, swift_int32_t *output)
{
	int i;

	for ( i = 0; i < m; i++, input += EIGHTH_N, output += N )
		FFT( input, output );
}

// Calculates the 'sum' part of SWIFFT, including the base change at the end.
// We divided the SWIFFT calculation into two, because that way we could save 2 computations of
// the FFT part, since in the first stage of SWIFFTX the difference between the first 3 SWIFFTs
// is only the A's part.
//
// Parameters:
// - input: the input. Of size 64 * m.
// - m: the input size divided by 64.
// - output: will store the result.
// - a: the coefficients in the sum. Of size 64 * m.
void SWIFFTSum( const swift_int32_t *input, int m, unsigned char *output,
                const swift_int16_t *a )
{
	int i, j;
	swift_int32_t result[N] __attribute__ ((aligned (64)));
	register swift_int16_t carry = 0;

#if defined(SIMD512)

   __m512i *res = (__m512i*)result;
   for ( j = 0; j < N/16; ++j )
   {
      __m512i sum = _mm512_setzero_si512();
      const __m512i *f = (__m512i*)input + j;
      const __m512i *k = (__m512i*)a + j;
      for ( i = 0; i < m; i++, f += N/16, k += N/16 )
         sum = _mm512_add_epi32( sum, _mm512_mullo_epi32( *f, *k ) );
      res[j] = sum;
   }

#elif defined(__AVX2__)

   __m256i *res = (__m256i*)result;
   for ( j = 0; j < N/8; ++j )
   {
      __m256i sum = _mm256_setzero_si256();
      const __m256i *f = (__m256i*)input + j;
      const __m256i *k = (__m256i*)a + j;
      for ( i = 0; i < m; i++, f += N/8, k += N/8 )
         sum = _mm256_add_epi32( sum, _mm256_mullo_epi32( *f, *k ) );
      res[j] = sum;
   }

#elif defined(__SSE4_1__)

   v128_t *res = (v128_t*)result;
   for ( j = 0; j < N/4; ++j )
   {
      v128_t sum = v128_zero;
      const v128_t *f = (v128_t*)input + j;
      const v128_t *k = (v128_t*)a + j;
      for ( i = 0; i < m; i++, f += N/4, k += N/4 )
         sum = v128_add32( sum, v128_mul32( *f, *k ) );
      res[j] = sum;
   }

#else

	for (j = 0; j < N; ++j)
	{
		register swift_int32_t sum = 0;
		const register swift_int32_t *f = input + j;
		const register swift_int16_t *k = a + j;
		for (i = 0; i < m; i++, f += N,k += N)
			sum += (*f) * (*k);
		result[j] = sum;
	}

#endif

	for (j = 0; j < N; ++j)
		result[j] = ((FIELD_SIZE << 22) + result[j]) % FIELD_SIZE;

	for (j = 0; j < 8; ++j)
	{
		int register carryBit = TranslateToBase256(result + (j << 3), output + (j << 3));
		carry |= carryBit << j;
	}

	output[N] = carry;
}

/*
void ComputeSingleSWIFFTX_smooth(unsigned char input[SWIFFTX_INPUT_BLOCK_SIZE],
                          unsigned char output[SWIFFTX_OUTPUT_BLOCK_SIZE],
						  bool doSmooth)
{
	int i;
	// Will store the result of the FFT parts:
	swift_int32_t fftOut[N * M] __attribute__ ((aligned (64)));
	unsigned char intermediate[N * 3 + 8] __attribute__ ((aligned (64)));
	unsigned char carry0,carry1,carry2;

	// Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
	// overriden by the following SWIFFT):

	// 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
	SWIFFTFFT(input, M, fftOut);

	// 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:

	// 2a. The first SWIFFT:
	SWIFFTSum(fftOut, M, intermediate, As);
	// Remember the carry byte:
	carry0 = intermediate[N];

	// 2b. The second one:
	SWIFFTSum(fftOut, M, intermediate + N, As + (M * N));
	carry1 = intermediate[2 * N];

	// 2c. The third one:
	SWIFFTSum(fftOut, M, intermediate + (2 * N), As + 2 * (M * N));
	carry2 = intermediate[3 * N];

	//2d. Put three carry bytes in their place
	intermediate[3 * N] = carry0;
	intermediate[(3 * N) + 1] = carry1;
	intermediate[(3 * N) + 2] = carry2;

	// Padding  intermediate output with 5 zeroes.
	memset(intermediate + (3 * N) + 3, 0, 5);

	// Apply the S-Box:
	for ( i = 0; i < (3 * N) + 8; ++i )
		intermediate[i] = SBox[intermediate[i]];

	// 3. The final and last SWIFFT:
	SWIFFTFFT(intermediate, 3 * (N/8) + 1, fftOut);
	SWIFFTSum(fftOut,       3 * (N/8) + 1, output, As);

	if (doSmooth)
	{
		unsigned char sum[N];
		register int i, j;
		memset(sum, 0, N);

		for (i = 0; i < (N + 1) * 8; ++i)
		{
			register const swift_int16_t *AsRow;
			register int AShift;

			if ( !( output[i >> 3] & ( 1 << (i&7) ) ) )
				continue;

			AsRow = As + N * M + (i & ~(N - 1)) ;
			AShift = i & 63;

			for ( j = AShift; j < N; ++j )
				sum[j] += AsRow[j - AShift];

			for( j = 0; j < AShift; ++j )
				sum[j] -= AsRow[N - AShift + j];
		}

		for ( i = 0; i < N; ++i )
			output[i] = sum[i];

		output[N] = 0;
	}
}
*/

void ComputeSingleSWIFFTX( unsigned char *input, unsigned char *output )
{
   int i;
   // Will store the result of the FFT parts:
   swift_int32_t fftOut[N * M] __attribute__ ((aligned (64)));
   unsigned char sum[ N*3 + 8 ] __attribute__ ((aligned (64)));
   unsigned char carry0,carry1,carry2;

   // Do the three SWIFFTS while remembering the three carry bytes (each carry byte gets
   // overriden by the following SWIFFT):

   // 1. Compute the FFT of the input - the common part for the first 3 SWIFFTs:
   SWIFFTFFT( input, M, fftOut );

   // 2. Compute the sums of the 3 SWIFFTs, each using a different set of coefficients:

   // 2a. The first SWIFFT:
   SWIFFTSum( fftOut, M, sum,       As         );
   carry0 = sum[N];

   // 2b. The second one:
   SWIFFTSum( fftOut, M, sum + N,   As +   M*N );
   carry1 = sum[ 2*N ];

   // 2c. The third one:
   SWIFFTSum( fftOut, M, sum + 2*N, As + 2*M*N );
   carry2 = sum[ 3*N ];

   //2d. Put three carry bytes in their place
   sum[ 3*N     ] = carry0;
   sum[ 3*N + 1 ] = carry1;
   sum[ 3*N + 2 ] = carry2;

   // Padding  intermediate output with 5 zeroes.
   memset( sum + 3*N + 3, 0, 5 );

   // Apply the S-Box:
   for ( i = 0; i < (3 * N) + 8; ++i )
      sum[i] = SBox[ sum[i] ];

   // 3. The final and last SWIFFT:
   SWIFFTFFT( sum, 3 * (N/8) + 1, fftOut );
   SWIFFTSum( fftOut,       3 * (N/8) + 1, sum, As );
   memcpy( output, sum, SWIFFTX_OUTPUT_BLOCK_SIZE - 1 );
}
