/*
 * =====================================================================================
 *
 *       Filename:  build-table.cpp
 *
 *    Description:  
 *
 *        Version:  1.0
 *        Created:  03/04/2011 22:11:22
 *       Revision:  none
 *       Compiler:  gcc
 *
 *         Author:  John Ryland (jryland), jryland@xiaofrog.com
 *        Company:  InvertedLogic
 *
 * =====================================================================================
 */
#include <QApplication>
#include <QPixmap>
#include <QImage>
#include <stdio.h>
#include "dct.h"

unsigned char uclamp(int x)
{
    if (x < 0)
        return 0;
    if (x > 255)
        return 255;
    return x;
}
signed char sclamp(int x)
{
    if (x < -127)
        return -127;
    if (x > 128)
        return 128;
    return x;
}



void output(FILE *f, unsigned char *image)
{
    int probabilitiesB[65536];
    int *probabilitiesA = (int*)malloc(64*65536*sizeof(int));
    memset(probabilitiesA, 0, 64*65536*sizeof(int));
    memset(probabilitiesB, 0, 65536*sizeof(int));
    int prevBlockDCT[64];
    memset(prevBlockDCT, 0, 64*sizeof(int));

/*
    for (int j = 0; j < 32; j++) {
        for (int i = 0; i < 32; i++) {
            unsigned int val = 0;
            for (int x = 0; x < 16; x++)
                for (int y = 0; y < 16; y++)
                    val += image[512*(j*16+y)+i*16+x];
            unsigned int b = val / 256;
            fwrite(&b, 4, 1, f);
        }
    }
*/

    for (int j = 0; j < 32; j++) {
        for (int i = 0; i < 32; i++) {

            unsigned char min = 255;
            unsigned char max = 0;
            unsigned char count[256];
            memset(count, 0, 256);
            for (int x = 0; x < 16; x++)
                for (int y = 0; y < 16; y++) {
                    unsigned char pix = image[512*(j*16+y)+i*16+x];
                    if ( pix < min )
                        min = pix;
                    if ( pix > max )
                        max = pix;
                    count[pix]++;
                }

            // perhaps scan for outliers?
            // could code for outliers with position
            // (two nibbles for the 16x16 coord, and value, total 2 bytes per outlier)
            // need heuristic to determine the number of outliers to code this way

            //printf("min: %i, max: %i, range: %i\n", min, max, max-min);
            //unsigned int b = val / 256;
            unsigned char scale = 0;
            unsigned char range = max - min;

            if ( range > 16 ) {
                unsigned char r = range >> 4;
                while (r) {
                    scale += 1;
                    r >>= 1;
                }
            }

            unsigned char distinctValueCount = 0;
            for (int i = 0; i < 256; i++) {
                if ( count[i] )
                    distinctValueCount++;
            }

            unsigned char min_exclude_outter = 255;
            unsigned char max_exclude_outter = 0;
            for (int i = 0; i < 256; i++) {
                if ( count[i] ) {
                    count[i]--;
                    for (; i < 256; i++) {
                        if ( count[i] ) {
                            count[i]--;
                            break;
                        }
                    }
                }
            }
            for (int i = 255; i >= 0; i--) {
                if ( count[i] ) {
                    count[i]--;
                    for (; i >= 0; i--) {
                        if ( count[i] ) {
                            count[i]--;
                            break;
                        }
                    }
                }
            }
            for (int i = 0; i < 256; i++) {
                if ( count[i] ) {
                    min_exclude_outter = i;
                    break;
                }
            }
            for (int i = 255; i >= 0; i--) {
                if ( count[i] ) {
                    max_exclude_outter = i;
                    break;
                }
            }

            unsigned char scale2 = 0;
            unsigned char range2 = max_exclude_outter - min_exclude_outter;

            if ( range2 > 16 ) {
                unsigned char r = range2 >> 4;
                while (r) {
                    scale2 += 1;
                    r >>= 1;
                }
            }
/*
            printf("min: %i, max: %i, range: %i, scale: %i, distincts: %i, mi2: %i, ma2: %i, r2: %i, s2: %i\n", min, 
                max, range, scale, distinctValueCount, min_exclude_outter, max_exclude_outter,
                range2, scale2);
*/

/* 
            for (int i = min; i < max; i++) {
                
            }
*/
            /*
                If very small range - subtract min, write min and write with the lower bpp needed
                If small distinct values - write a small LUT and write with the lower bpp needed
                If small range excluding outliers - write out outliers, subtract min and write with lower bpp

                Last case is if many different values over large range (kind of random image) - quantize? 
                Or do deltas, idea being that the values are spread over a wide range but if the image
                has any coherency to it, then the deltas might be consistently low provided following a
                pattern over the area that corresponds with the consistency in the image.

                Perhaps need to use a cost equation to find optimal method
            */

/*
            2 bits - method
            3 bits - bpp
            3 bits - LUT/outliers count
*/

            unsigned char method = 0;
            unsigned char bpp = 0;
            unsigned char lut_count = 0;

            // Sometimes it is better to use distinctValues instead, even if range is low.
            // Also range might be low, but removing outliers can save space too with a smaller range
            // can do a cost calculation:
            //
            //       r1 = range
            //       r2 = range after removing outliers
            //       dv = distinct values
            //
            //       c1 = cost 1 =     bpp for r1 * 256 bits
            //       c2 = cost 2 =     bpp for r2 * (256 - outliers) + outliers * 16 bits
            //       c3 = cost 3 =     bpp for dv * 256 + dv * 8 bits
            //


            // Other method is doing the delta between each pixel and calculate the range of deltas
            // (and also perhaps the distinct values of deltas)
            // Perhaps need some various zig-zag patterns to follow to find the pattern that has
            // the smallest range or set of delta values
            // bpp for r1 * 256 bits + 16 bits for pattern (assuming 65536 different patterns)

            // with the entropy coding that goes over everything as a final step, having runs of
            // similar values might be better, so a reorder of the 16x16 cell values with a pattern
            // might be useful in some cases (c1, c2, c3)


            // find lowest cost method

            enum {
                RAW = -1,
                RANGE, LUT, LUT_OUT
            };

            unsigned int range_bpp = 0;
            unsigned int lut_bpp = 0;
            unsigned int lut_out_bpp = 0;

            unsigned int lowest_cost = 8 * 256; // raw coding
            unsigned int lc_meth = RAW; // raw coding
            unsigned int c1 = range_bpp * 256;
            if ( c1 < lowest_cost ) {
                lowest_cost = c1;
                lc_meth = RANGE;
            }
            unsigned int c2 = distinctValueCount * 8 + lut_bpp * 256;
            if ( c2 < lowest_cost ) {
                lowest_cost = c2;
                lc_meth = LUT;
            }
            



            range = 32;
            distinctValueCount = 32;

            if ( range < 16 ) {
                method = 0;
                if ( range == 0 ) {
                    bpp = 0;
                } else if ( range == 1 ) {
                    bpp = 1;
                } else if ( range <= 3 ) {
                    bpp = 2;
                //} else if ( range <= 7 ) {
                  //  bpp = 3;
                } else {
                    bpp = 4;
                }
                printf("method: %i, bpp: %i, ", method, bpp);
                printf("min: %i, max: %i, range: %i, scale: %i, distincts: %i, mi2: %i, ma2: %i, r2: %i, s2: %i\n", min, 
                    max, range, scale, distinctValueCount, min_exclude_outter, max_exclude_outter,
                range2, scale2);
                method = (method << 6) | (bpp << 3) | lut_count;
                fwrite(&method, 1, 1, f);
                fwrite(&min, 1, 1, f);
                //fwrite(&scale, 1, 1, f);
                if ( bpp == 0 ) {
                } else {
                    for (int y = 0; y < 16; y++)
                        for (int x = 0; x < 16; x+=(8/bpp)) {
                            unsigned char pix = 0;
                            for (int k = 0; k < (8/bpp); k++)
                                pix = (pix << bpp) | (image[512*(j*16+y)+i*16+x+k] - min);
                            fwrite(&pix, 1, 1, f);
                        } 
                }
            } else if ( distinctValueCount < 16 ) {

                method = 1;
                if ( distinctValueCount == 0 ) {
                    bpp = 0;
                } else if ( distinctValueCount == 1 ) {
                    bpp = 1;
                } else if ( distinctValueCount <= 3 ) {
                    bpp = 2;
                //} else if ( distinctValueCount <= 7 ) {
                  //  bpp = 3;
                } else {
                    bpp = 4;
                }
                printf("method: %i, bpp: %i, ", method, bpp);
                printf("min: %i, max: %i, range: %i, scale: %i, distincts: %i, mi2: %i, ma2: %i, r2: %i, s2: %i\n", min, 
                    max, range, scale, distinctValueCount, min_exclude_outter, max_exclude_outter,
                range2, scale2);
                //lut_count = (distinctValueCount + 1) / 2;
                method = (method << 6) | (bpp << 3) | lut_count;
                fwrite(&method, 1, 1, f);
                fwrite(&distinctValueCount, 1, 1, f);
                for (int x = 0; x < 16; x++)
                    for (int y = 0; y < 16; y++) {
                        unsigned char pix = image[512*(j*16+y)+i*16+x];
                        count[pix]++;
                    }
                unsigned char lut[256];
                unsigned char t = 0;
                for (unsigned int c = 0; c <= 255; c++) {
                    if ( count[c] ) {
                        unsigned char ch = c;
                        fwrite(&ch, 1, 1, f);
                        lut[c] = t;
                        t++;
                    }
                }
                if ( bpp == 0 ) {
                } else {
                    for (int y = 0; y < 16; y++)
                        for (int x = 0; x < 16; x+=(8/bpp)) {
                            unsigned char pix = 0;
                            for (int k = 0; k < (8/bpp); k++)
                                pix = (pix << bpp) | lut[image[512*(j*16+y)+i*16+x+k]];
                            fwrite(&pix, 1, 1, f);
                        }
                }
            } else /* quantize */ {

                method = 2;
                method = (method << 6) | (bpp << 3) | lut_count;
                fwrite(&method, 1, 1, f);

                for (int s1 = 0; s1 < 2; s1++) {
                    for (int t1 = 0; t1 < 2; t1++) {
                        int pixels[64];
                        int blockDCT[64];
                        for (int y = 0; y < 8; y++)
                            for (int x = 0; x < 8; x++) 
                                pixels[y*8+x] = image[512*(j*16+y+s1*8)+i*16+x+t1*8];
                        encodeDCT(pixels, blockDCT);
                        int zeroCount = 0;
                        int ti = 63;
                        while ( !blockDCT[ti] ) {
                            zeroCount++;
                            ti--;
                        }
                        for (int x = 1; x < 64; x++) {
                            signed short s = blockDCT[x];
                            unsigned short us = (unsigned short)s;
                            probabilitiesA[x*65536 + us]++;
                            if (s > -200) 
                            probabilitiesB[s + 200]++;
                        }
/*
                        unsigned long long signs = 0;
                        for (int x = 0; x < 64; x++) {
                            signs <<= 1;
                            if ( blockDCT[x] < 0 ) {
                                signs |= 1;
                                blockDCT[x] = -blockDCT[x];
                            }
                        }
                        fwrite(&signs, 8, 1, f);
*/
/*
                        unsigned long long plane;
                        for (int y = 0; y < 7; y++) {
                            plane = 0;
                            for (int x = 0; x < 64; x++) {
                                plane <<= 1;
                                if ( blockDCT[x] & (0x40>>y) )
                                    plane |= 1;
                            }
                            fwrite(&plane, 8, 1, f);
                        }
*/

/*
unsigned int bits[256] = {
    0xF | -127, // -127
    0xF | -5, // -5
    0x0E, // -4
    0x0C, // -3
    0x04, // -2
    0x00, // -1
    0x01, //  1
    0x05, //  2
    0x0D, //  3
    0xF | 4, // 4
    0xF | 128, // 128
};
unsigned int count[256] = {
    12, // -127
    12, // -5
    4,  // -4
    4,  // -3
    3,  // -2
    2,  // -1
    2,  //  1
    3,  //  2
    4,  //  3
    12, //  4
    12, //  128
};
*/
                        // 1          -1
                        // 01          1
                        // 001        -2
                        // 0001        2
                        // 00001      -3
                        // 000001      3
                        // 0000001    -4
                        // 00000001    4
                        // 00000000 x  x


                        //  00        -1
                        //  01         1
                        //  10        moreA
                        //  11        moreB

                        // moreA
                        //  100       -2
                        //  101        2

                        // moreB
                        //  1100      -3
                        //  1101       3
                        //  1110      moreE
                        //  1111      moreD

                        // moreE
                        //  1110 1        -4
                        //  1110 01        4
                        //  1110 001      -5
                        //  1110 0001      5
                        //  1110 00001    -6
                        //  1110 000001    6
                        //  1110 0000001  -7
                        //  1110 00000001  7

                        // moreD
                        //  1111 x


                        //  1010 0000   4
                        //       0001   5
                        //       0010   6
                        //       0011   7
                        //       0100   8
                        //       0101   9
                        //       0110  10
                        //       0111  11
                        //       1000  12
                        //       1001  13
                        //       1010  14
                        //       1011  15
                        //       1100  16
                        //       1101  17
                        //       1110  18
                        //       1111  19
                        // moreD
                        //  1011 0000  20
                        //       0001  21
                        //       0010  22
                        //       0011  23
                        //       0100  24
                        //       0101  25
                        //       0110  26
                        //       0111  27
                        //       1000  28
                        //       1001  29
                        //       1010  30
                        //       1011  31
                        //       1100  32
                        //       1101  33
                        //       1110  34
                        //       1111  35
                        // moreE
                        //  1110 0000  -4
                        //       0001
                        //       0010
                        //       0011
                        //       0100
                        //       0101
                        //       0110
                        //       0111
                        //       1000
                        //       1001
                        //       1010
                        //       1011
                        //       1100
                        //       1101
                        //       1110
                        //       1111
                        // moreF
                        //  1111 0000
                        //       0001
                        //       0010
                        //       0011
                        //       0100
                        //       0101
                        //       0110
                        //       0111
                        //       1000
                        //       1001
                        //       1010
                        //       1011
                        //       1100
                        //       1101
                        //       1110
                        //       1111  moreG
                        
                        // moreG
                        //  1111 1111   0000 0000   x
                        //              0000 0001   x
                        //              0000 0010   x



                        //  000        -1
                        //  001         1
                        //  010        -2
                        //  011         2
                        //  100        -3
                        //  101         3
                        //  110        moreA
                        //  111        moreB
                        
                        // moreA
                        //  110000      4
                        //  110001     -4
                        //  110010      5
                        //  110011     -5
                        //  110100      6
                        //  110101     -6
                        //  110110      7
                        //  110111     -7

                        // moreB
                        //  11100000    8
                        //  11100001   -8
                        //  11100010    9
                        //  11100011   -9
                        //  11100100   10
                        //  11100101  -10
                        //  11100110   11
                        //  11100111  -11
                        //  11101000   12
                        //  11101001  -12
                        //  11101010   13
                        //  11101011  -13
                        //  11101100   14
                        //  11101101  -14
                        //  11101110   15
                        //  11101111  -15
                        //  11110000   16
                        //  11110001  -16
                        //  11110010   17
                        //  11110011  -17
                        //  11110100   18
                        //  11110101  -18
                        //  11110110   19
                        //  11110111  -19
                        //  11111000   20
                        //  11111001  -20
                        //  11111010   21
                        //  11111011  -21
                        //  11111100   22
                        //  11111101  -22
                        //  11111110   23
                        //  11111111  -23
  
                        int bitsBuffered = 0;
                        unsigned long bits = 0;

                        signed int blockDCTcode[64];
                        blockDCTcode[0] = blockDCT[0] - prevBlockDCT[0];
                        blockDCTcode[1] = blockDCT[1];
                        for (int x = 2; x < 3; x++)
                            //blockDCTcode[x] = blockDCT[x];
                            //blockDCTcode[x] = blockDCT[x] - prevBlockDCT[x];
                            blockDCTcode[x] = blockDCT[x] - blockDCT[x-1];
                        for (int x = 3; x < 64; x++) {
                            blockDCTcode[x] = blockDCT[x];
                            //blockDCTcode[x] = blockDCT[x] - blockDCT[x-1];
                            //blockDCTcode[x] = blockDCT[x] - prevBlockDCT[x];
                        }

                        unsigned long long zeros = 0;
                        for (int x = 0; x < 64; x++) {
                            zeros <<= 1;
                            if ( blockDCTcode[x] == 0 )
                                zeros |= 1;
                        }
                        fwrite(&zeros, 8, 1, f);

                        unsigned char count = 64 - zeroCount;
                        //fwrite(&count, 1, 1, f);
                        if ( count > 32 )
                            printf("count = %i\n", count);

                        printf("blockDCT = ");

                        for (int x = 0; x < 64; x++) {
                        //for (int x = 0; x < 64; x++) {
                            signed int c2 = blockDCTcode[x];
                            signed int ch = blockDCTcode[x];
                            //if (x > 10)
                            //    ch -= blockDCT[x-1];
                            //if ( ch > 32 || ch < -32 )
                            //    printf("blockDCT[x] = %i\n", ch);
                            //if (x == 0) {//5 && x < 8) {
                                //ch = (ch - blockDCT[x-1]) - (prevBlockDCT[x] - prevBlockDCT[x-1]);
                              //  ch = ch - prevBlockDCT[x];
                            //}
                            /*
                            if (x > 1) {
                                ch = ch - blockDCT[x-1];
                            }
                            */
                            if (c2 != 0)
                                printf("%i,", ch);
                            signed char c = ch;

                            if (c2 != 0) {
                                if ( c > -4 && c <= 4 ) {
                                    int llut[9] = { 7,5,3,1,0,2,4,6,8 };
                                    int t = llut[c + 4];
                                    bitsBuffered += t;
                                    bits <<= t;
                                    bits |= 1;
                                } else {
                                    bitsBuffered += 16;
                                    bits <<= 16;
                                    bits |= (unsigned char)(c & 0xFF);
                                }
                                /*
                                switch (c) {
                                    case -1:
                                        bitsBuffered++;
                                        bits <<= 1;
                                        bits |= 1;
                                        break;
                                    case 1:
                                        bitsBuffered += 2;
                                        bits <<= 2;
                                        bits |= 1;
                                        break;
                                    case -2:
                                        bitsBuffered += 3;
                                        bits <<= 3;
                                        bits |= 1;
                                        break;
                                    case 2:
                                        bitsBuffered += 4;
                                        bits <<= 4;
                                        bits |= 1;
                                        break;
                                    case -3:
                                        bitsBuffered += 5;
                                        bits <<= 5;
                                        bits |= 1;
                                        break;
                                    case 3:
                                        bitsBuffered += 6;
                                        bits <<= 6;
                                        bits |= 1;
                                        break;
                                    case -4:
                                        bitsBuffered += 7;
                                        bits <<= 7;
                                        bits |= 1;
                                        break;
                                    case 4:
                                        bitsBuffered += 8;
                                        bits <<= 8;
                                        bits |= 1;
                                        break;
                                    default:
                                        bitsBuffered += 16;
                                        bits <<= 16;
                                        bits |= (unsigned char)(c & 0xFF);
                                        break;
                                }
                                 */
                            //    fwrite(&c, 1, 1, f);
                                while ( bitsBuffered >= 8 ) {
                                    unsigned long long b = (bits >> (bitsBuffered - 8)) & 0xFF;
                                    unsigned char b2 = (unsigned char)b;
                                    fwrite(&b2, 1, 1, f);
                                    bitsBuffered -= 8;
                                    //bits >>= 8;
                                }
                            }
                        // 1          -1
                        // 01          1
                        // 001        -2
                        // 0001        2
                        // 00001      -3
                        // 000001      3
                        // 0000001    -4
                        // 00000001    4
                        // 00000000 x  x
                        }
                        printf("\n");

                        while ( bitsBuffered > 0 ) {
                            unsigned long long b = (bits >> (bitsBuffered - 8)) & 0xFF;
                            if ( bitsBuffered < 8 ) {
                                b = (bits << (8 - bitsBuffered)) & 0xFF;
                                b |= 0xFF >> (bitsBuffered - 8);
                                //b = (bits & (0xFF >> (8 - bitsBuffered)));
                                //b <<= 8 - bitsBuffered;
                            }
                            unsigned char b2 = (unsigned char)b;
                            fwrite(&b2, 1, 1, f);
                            bitsBuffered -= 8;
                            //bits >>= 8;
                        }
                        /*
                        unsigned char b2 = 0xFF;
                        fwrite(&b2, 1, 1, f);
                        b2 = 0xAB;
                        fwrite(&b2, 1, 1, f);
                        */
                        for (int x = 0; x < 64; x++)
                            prevBlockDCT[x] = blockDCT[x];
                    }
                }

/*
                fwrite(&min, 1, 1, f);
                fwrite(&scale, 1, 1, f);
                for (int y = 0; y < 16; y++)
                    for (int x = 0; x < 16; x+=2) {
                        unsigned char pix1 = image[512*(j*16+y)+i*16+x+0];
                        unsigned char pix2 = image[512*(j*16+y)+i*16+x+1];
                        pix1 -= min;
                        pix1 >>= scale;
                        pix2 -= min;
                        pix2 >>= scale;
                        pix1 = (pix1 << 4) | pix2;
                        fwrite(&pix1, 1, 1, f);
                    }
*/
            }

        }
    }

    for (int x = 0; x < 64; x++)
    {
        int highest = 0;
        int hy = 0;
        for (int y = 0; y < 65536; y++) {
            if ( probabilitiesA[x*65536 + y] > highest) {
                highest = probabilitiesA[x*65536 + y];
                hy = y;
            }
        }
        printf("Highest prob value at [%i] is %i (%i)\n", x, hy, highest);
    }
    for (int x = 0; x < 400; x++) {
        printf("prob value at [%i] is %i (%i)\n", x-200, probabilitiesB[x]);
    }
}

int main(int argc, char *argv[])
{
    QApplication app(argc, argv);

printf("starting\n");

    initTables();

//    testDCT();
//    exit(0);

    const int size = 512*512;
    unsigned char *image = (unsigned char *)malloc(size);
printf("alloced mem\n");
    memset(image, 0, size);
printf("zeroed mem\n");

    QImage pix( QString("test02.png") );
    int w = pix.width();
    int h = pix.height();

    for (int y = 0; y < h; y++) {
        for (int x = 0; x < w; x++) {
            image[y*w+x] = pix.pixel(x,y);
        }
    }

    FILE *f = fopen("blah.out", "w");
printf("opened output\n");
    output(f, image);
printf("written output\n");
    fclose(f);
printf("closed output\n");

    QImage outputImage(512, 512, QImage::Format_RGB888);
    for (int y = 0; y < h; y++) {
        for (int x = 0; x < w; x++) {
            unsigned char p = image[y*w+x];
            unsigned int p2 = p;
            p2 = (p2 << 16) | (p2 << 8) | p2;
            outputImage.setPixel(x, y, p2);
        }
    }
    outputImage.save("output.png");
    outputImage.save("output.jpg");

    return 0;//app.exec();
}


