Newer
Older
Import / research / signals-slots / src / gui / jpeg / mjpegdec.h
/*
 * MJPEG decoder
 * Copyright (c) 2000, 2001 Fabrice Bellard.
 * Copyright (c) 2003 Alex Beregszaszi
 * Copyright (c) 2003-2004, 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * Support for external huffman table, various fixes (AVID workaround),
 * aspecting, new decode_frame mechanism and apple mjpeg-b support
 *                                  by Alex Beregszaszi
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file mjpegdec.c
 * MJPEG decoder.
 */

//#define DEBUG
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <limits.h>
#include <stdarg.h>
#include <inttypes.h>
#include <stdlib.h>
#include <sys/types.h> /* size_t */

#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))

static inline int ff_get_fourcc(const char *s){
    return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
}

static
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
__attribute__((always_inline))
#endif
inline uint32_t bswap_32(uint32_t x)
{
#if defined(ARCH_X86)
#if __CPU__ != 386
 __asm("bswap   %0":
      "=r" (x)     :
#else
 __asm("xchgb   %b0,%h0\n"
      "         rorl    $16,%0\n"
      "         xchgb   %b0,%h0":
# ifdef ARCH_X86_64
        "=Q" (x)            :
# else
        "=q" (x)            :
# endif
#endif
      "0" (x));
#elif defined(ARCH_SH4)
        __asm__(
        "swap.b %0,%0\n"
        "swap.w %0,%0\n"
        "swap.b %0,%0\n"
        :"=r"(x):"0"(x));
#elif defined(ARCH_ARM)
    uint32_t t;
    __asm__ (
      "eor %1, %0, %0, ror #16 \n\t"
      "bic %1, %1, #0xFF0000   \n\t"
      "mov %0, %0, ror #8      \n\t"
      "eor %0, %0, %1, lsr #8  \n\t"
      : "+r"(x), "+r"(t));
#elif defined(ARCH_BFIN)
    unsigned tmp;
    asm("%1 = %0 >> 8 (V);\n\t"
        "%0 = %0 << 8 (V);\n\t"
        "%0 = %0 | %1;\n\t"
        "%0 = PACK(%0.L, %0.H);\n\t"
        : "+d"(x), "=&d"(tmp));
#else
    x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
    x= (x>>16) | (x<<16);
#endif
    return x;
}

// be2me ... BigEndian to MachineEndian
#ifdef WORDS_BIGENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#else
#define be2me_32(x) bswap_32(x)
#endif

/* av_log API */
#define AV_LOG_ERROR    16
#define AV_LOG_WARNING  24
#define AV_LOG_INFO     32
#define AV_LOG_DEBUG    48

#define av_log(a, ...)

#ifndef av_log
#ifdef __GNUC__
static void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
#else
static void av_log(void*, int level, const char *fmt, ...);
#endif
#endif

#ifndef FF_PF
#define FF_PF
enum FF_PixelFormat {
    PIX_FMT_NONE= -1,
    PIX_FMT_RGB24,     ///< Packed RGB 8:8:8, 24bpp, RGBRGB...
    PIX_FMT_RGB32,     ///< Packed RGB 8:8:8, 32bpp, (msb)8A 8R 8G 8B(lsb), in cpu endianness
    PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
    PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white
    PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
    PIX_FMT_GRAY16BE,  ///<        Y        , 16bpp, big-endian
    PIX_FMT_YUVJ420P,  ///< Planar YUV 4:2:0, 12bpp, full scale (jpeg)
    PIX_FMT_YUVJ422P,  ///< Planar YUV 4:2:2, 16bpp, full scale (jpeg)
    PIX_FMT_YUVJ444P,  ///< Planar YUV 4:4:4, 24bpp, full scale (jpeg)
    PIX_FMT_YUVJ440P,  ///< Planar YUV 4:4:0 full scale (jpeg)
    PIX_FMT_YUV420P,   ///< Planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
    PIX_FMT_YUV422P,   ///< Planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
    PIX_FMT_YUV444P,   ///< Planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
    PIX_FMT_YUYV422,   ///< Packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
    PIX_FMT_GRAY16,    ///<        Y        , 16bpp, little-endian
    PIX_FMT_YUV440P,   ///< Planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
    PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
#endif


#define AV_RB32(x)  ((((uint8_t*)(x))[0] << 24) | \
                     (((uint8_t*)(x))[1] << 16) | \
                     (((uint8_t*)(x))[2] <<  8) | \
                      ((uint8_t*)(x))[3])

/**
 * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
 * This is mainly needed because some optimized bitstream readers read
 * 32 or 64 bit at once and could read over the end.<br>
 * Note: If the first 23 bits of the additional bytes are not 0, then damaged
 * MPEG bitstreams could cause overread and segfault.
 */
#define FF_INPUT_BUFFER_PADDING_SIZE 8
#define CODEC_FLAG_EXTERN_HUFF     0x1000   ///< Use external Huffman table (for MJPEG).


/**
 * Audio Video Frame.
 */
typedef struct AVFrame {
    uint8_t *data[4];
    int linesize[4];
} AVFrame;

/**
 * main external API structure.
 * New fields can be added to the end with minor version bumps.
 * Removal, reordering and changes to existing fields require a major
 * version bump.
 * sizeof(AVCodecContext) must not be used outside libav*.
 */
typedef struct AVCodecContext {
    int flags;
    /**
     * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
     * than extradata_size to avoid prolems if it is read with the bitstream reader.
     */
    uint8_t *extradata;
    int extradata_size;
    int width, height;
    enum FF_PixelFormat pix_fmt;
    void *priv_data;

    /**
     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
     * - encoding: unused
     * - decoding: Set by user.
     */
    int lowres;
    int coded_width, coded_height;
} AVCodecContext;

/**
 * four components are given, that's all.
 * the last component is alpha
 */
typedef struct AVPicture {
    uint8_t *data[4];
    int linesize[4];       ///< number of bytes per line
    int width, height;
} AVPicture;

/*
 * DSP utils
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
 */

/* dct code */
typedef short DCTELEM;

void j_rev_dct(DCTELEM *data);
void j_rev_dct4(DCTELEM *data);
void j_rev_dct2(DCTELEM *data);
void j_rev_dct1(DCTELEM *data);

/* encoding scans */
static const uint8_t ff_zigzag_direct[64] = {
    0,   1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
    12, 19, 26, 33, 40, 48, 41, 34,
    27, 20, 13,  6,  7, 14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36,
    29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46,
    53, 60, 61, 54, 47, 55, 62, 63
};


/* pixel operations */
#define MAX_NEG_CROP 1024
static uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };


//int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
#define FF_PARTTRANS_IDCT_PERM 5

#ifdef __GNUC__
  #define DECLARE_ALIGNED_8(t,v)       t v __attribute__ ((aligned (16)))
#else
  #define DECLARE_ALIGNED_8(t,v)       __declspec(align(16)) t v
#endif


#if defined(ARCH_X86)
// avoid +32 for shift optimization (gcc should do that ...)
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
    asm ("shrl %1, %0\n\t"
         : "+r" (a)
         : "ic" ((uint8_t)(-s))
    );
    return a;
}
#else
#    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
#endif

/* bit output */

/* buf and buf_end must be present and used by every alternative writer. */
typedef struct PutBitContext {
    uint32_t bit_buf;
    int bit_left;
    uint8_t *buf, *buf_ptr, *buf_end;
} PutBitContext;


static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
{
    if(buffer_size < 0) {
        buffer_size = 0;
        buffer = NULL;
    }

    s->buf = buffer;
    s->buf_end = s->buf + buffer_size;
    s->buf_ptr = s->buf;
    s->bit_left=32;
    s->bit_buf=0;
}


/* pad the end of the output stream with zeros */
static inline void flush_put_bits(PutBitContext *s)
{
    s->bit_buf<<= s->bit_left;
    while (s->bit_left < 32) {
        /* XXX: should test end of buffer */
        *s->buf_ptr++=s->bit_buf >> 24;
        s->bit_buf<<=8;
        s->bit_left+=8;
    }
    s->bit_left=32;
    s->bit_buf=0;
}


/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
    const uint8_t *buffer, *buffer_end;
    int index;
    int size_in_bits;
} GetBitContext;

typedef struct VLC {
    int bits;
    int16_t (*table)[2]; ///< code, bits
    int table_size, table_allocated;
} VLC;

typedef struct RL_VLC_ELEM {
    int16_t level;
    int8_t len;
    uint8_t run;
} RL_VLC_ELEM;

#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) || defined(ARCH_MIPS) || defined(ARCH_BFIN)
#define UNALIGNED_STORES_ARE_BAD
#endif

static inline void put_bits(PutBitContext *s, int n, unsigned int value)
{
    unsigned int bit_buf;
    int bit_left;

    //    printf("put_bits=%d %x\n", n, value);
    assert(n == 32 || value < (1U << n));

    bit_buf = s->bit_buf;
    bit_left = s->bit_left;

    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
    /* XXX: optimize */
    if (n < bit_left) {
        bit_buf = (bit_buf<<n) | value;
        bit_left-=n;
    } else {
        bit_buf<<=bit_left;
        bit_buf |= value >> (n - bit_left);
#ifdef UNALIGNED_STORES_ARE_BAD
        if (3 & (intptr_t) s->buf_ptr) {
            s->buf_ptr[0] = bit_buf >> 24;
            s->buf_ptr[1] = bit_buf >> 16;
            s->buf_ptr[2] = bit_buf >>  8;
            s->buf_ptr[3] = bit_buf      ;
        } else
#endif
        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
        //printf("bitbuf = %08x\n", bit_buf);
        s->buf_ptr+=4;
        bit_left+=32 - n;
        bit_buf = value;
    }

    s->bit_buf = bit_buf;
    s->bit_left = bit_left;
}



static inline unsigned int get_word(GetBitContext *s)
{
    uint32_t tmp = (s->buffer[s->index>>3] << 8) | s->buffer[(s->index>>3)+1];
    s->index += 16;
    return tmp;
}

static inline unsigned int get_byte(GetBitContext *s)
{
    uint32_t tmp = s->buffer[s->index>>3];
    s->index += 8;
    return tmp;
}

static inline void skip_bits(GetBitContext *s, int n)
{
    s->index += n;
}

static inline int get_bits_count(GetBitContext *s)
{
    return s->index;
}


/**
 * init GetBitContext.
 * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
 * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
 * @param bit_size the size of the buffer in bits
 */
static inline void init_get_bits(GetBitContext *s,
                   const uint8_t *buffer, int bit_size)
{
    int buffer_size= (bit_size+7)>>3;
    if(buffer_size < 0 || bit_size < 0) {
        buffer_size = bit_size = 0;
        buffer = NULL;
    }

    s->buffer= buffer;
    s->size_in_bits= bit_size;
    s->buffer_end= buffer + buffer_size;
    s->index=0;
}

static inline void align_get_bits(GetBitContext *s)
{
    s->index += (-s->index) & 7;
}


#define GET_VLC(code, gb, table)\
{\
    int n, index;\
\
    re_cache0 = AV_RB32( (gb)->buffer+(re_index>>3) ) << (re_index&0x07);\
    index = ((uint32_t)re_cache0)>>(32-9); \
    code = table[index][0];\
    n    = table[index][1];\
\
    if (n < 0){\
        re_index += 9;\
        re_cache0 <<= 9;\
        index = (((uint32_t)re_cache0)>>(32+n)) + code; \
        code = table[index][0];\
        n    = table[index][1];\
    }\
    re_cache0 <<= n;\
    re_index += n;\
}



#include "jrevdct.h"

static void ff_jref_idct_put(uint8_t *pixels, int line_size, DCTELEM *block)
{
    //printf("j rev dct 0\n");
    j_rev_dct(block);
    //printf("j rev dct 1\n");
    int i;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

    int j;

    //assert(pixels);
    //printf("pixels: %x  linesize: %i\n", pixels, line_size);

    /* read the pixels */
    for(i=0;i<8;i++) {
/*
        for(j=0;j<8;j++) {
            assert(block[j] > -1024);
            assert(block[j] < 1024);
        }
*/
        pixels[0] = cm[block[0]];
        pixels[1] = cm[block[1]];
        pixels[2] = cm[block[2]];
        pixels[3] = cm[block[3]];
        pixels[4] = cm[block[4]];
        pixels[5] = cm[block[5]];
        pixels[6] = cm[block[6]];
        pixels[7] = cm[block[7]];

        pixels += line_size;
        block += 8;
    }
//    printf("written\n");
}

static void ff_jref_idct_add(uint8_t *pixels, int line_size, DCTELEM *block)
{
    j_rev_dct(block);
    int i;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

    /* read the pixels */
    for(i=0;i<8;i++) {
        pixels[0] = cm[pixels[0] + block[0]];
        pixels[1] = cm[pixels[1] + block[1]];
        pixels[2] = cm[pixels[2] + block[2]];
        pixels[3] = cm[pixels[3] + block[3]];
        pixels[4] = cm[pixels[4] + block[4]];
        pixels[5] = cm[pixels[5] + block[5]];
        pixels[6] = cm[pixels[6] + block[6]];
        pixels[7] = cm[pixels[7] + block[7]];
        pixels += line_size;
        block += 8;
    }
}

/**
 * Scantable.
 */
typedef struct ScanTable{
    uint8_t permutated[64];
} ScanTable;

#define MAX_COMPONENTS 4

typedef struct MJpegDecodeContext {
    AVCodecContext *avctx;
    GetBitContext gb;

    int start_code; /* current start code */
    int buffer_size;
    uint8_t *buffer;

    int16_t quant_matrixes[4][64];
    VLC vlcs[2][4];
    int qscale[4];      ///< quantizer scale calculated from quant_matrixes

    int org_height;  /* size given at codec init */
    int interlaced;     /* true if interlaced */
    int bottom_field;   /* true if bottom field */
    int lossless;
    int ls;
    int progressive;
    int rgb;
    int rct;            /* standard rct */
    int pegasus_rct;    /* pegasus reversible colorspace transform */
    int bits;           /* bits per component */

    int maxval;
    int near;         ///< near lossless bound (si 0 for lossless)
    int t1,t2,t3;
    int reset;        ///< context halfing intervall ?rename

    int width, height;
    int mb_width, mb_height;
    int nb_components;
    int component_id[MAX_COMPONENTS];
    int h_count[MAX_COMPONENTS]; /* horizontal and vertical count for each component */
    int v_count[MAX_COMPONENTS];
    int comp_index[MAX_COMPONENTS];
    int dc_index[MAX_COMPONENTS];
    int ac_index[MAX_COMPONENTS];
    int nb_blocks[MAX_COMPONENTS];
    int h_scount[MAX_COMPONENTS];
    int v_scount[MAX_COMPONENTS];
    int h_max, v_max; /* maximum h and v counts */
    int quant_index[4];   /* quant table index for each component */
    int last_dc[MAX_COMPONENTS]; /* last DEQUANTIZED dc (XXX: am I right to do that ?) */
    AVFrame picture; /* picture structure */
    int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
    int8_t *qscale_table;
    DECLARE_ALIGNED_8(DCTELEM, block[64]);
    ScanTable scantable;

    int restart_interval;
    int restart_count;

    int buggy_avid;
    int cs_itu601;
    int interlace_polarity;

    int mjpb_skiptosod;

    int cur_scan; /* current scan, used by JPEG-LS */
} MJpegDecodeContext;

static int ff_mjpeg_decode_dqt(MJpegDecodeContext *s);
static int ff_mjpeg_decode_dht(MJpegDecodeContext *s);
static int ff_mjpeg_decode_sof(MJpegDecodeContext *s);
static int ff_mjpeg_decode_sos(MJpegDecodeContext *s);


/* JPEG marker codes */
typedef enum {
    /* start of frame */
    SOF0  = 0xc0,       /* baseline */
    SOF1  = 0xc1,       /* extended sequential, huffman */
    SOF2  = 0xc2,       /* progressive, huffman */
    SOF3  = 0xc3,       /* lossless, huffman */

    SOF5  = 0xc5,       /* differential sequential, huffman */
    SOF6  = 0xc6,       /* differential progressive, huffman */
    SOF7  = 0xc7,       /* differential lossless, huffman */
    JPG   = 0xc8,       /* reserved for JPEG extension */
    SOF9  = 0xc9,       /* extended sequential, arithmetic */
    SOF10 = 0xca,       /* progressive, arithmetic */
    SOF11 = 0xcb,       /* lossless, arithmetic */

    SOF13 = 0xcd,       /* differential sequential, arithmetic */
    SOF14 = 0xce,       /* differential progressive, arithmetic */
    SOF15 = 0xcf,       /* differential lossless, arithmetic */

    DHT   = 0xc4,       /* define huffman tables */

    DAC   = 0xcc,       /* define arithmetic-coding conditioning */

    /* restart with modulo 8 count "m" */
    RST0  = 0xd0,
    RST1  = 0xd1,
    RST2  = 0xd2,
    RST3  = 0xd3,
    RST4  = 0xd4,
    RST5  = 0xd5,
    RST6  = 0xd6,
    RST7  = 0xd7,

    SOI   = 0xd8,       /* start of image */
    EOI   = 0xd9,       /* end of image */
    SOS   = 0xda,       /* start of scan */
    DQT   = 0xdb,       /* define quantization tables */
    DNL   = 0xdc,       /* define number of lines */
    DRI   = 0xdd,       /* define restart interval */
    DHP   = 0xde,       /* define hierarchical progression */
    EXP   = 0xdf,       /* expand reference components */

    APP0  = 0xe0,
    APP1  = 0xe1,
    APP2  = 0xe2,
    APP3  = 0xe3,
    APP4  = 0xe4,
    APP5  = 0xe5,
    APP6  = 0xe6,
    APP7  = 0xe7,
    APP8  = 0xe8,
    APP9  = 0xe9,
    APP10 = 0xea,
    APP11 = 0xeb,
    APP12 = 0xec,
    APP13 = 0xed,
    APP14 = 0xee,
    APP15 = 0xef,

    JPG0  = 0xf0,
    JPG1  = 0xf1,
    JPG2  = 0xf2,
    JPG3  = 0xf3,
    JPG4  = 0xf4,
    JPG5  = 0xf5,
    JPG6  = 0xf6,
    SOF48 = 0xf7,       ///< JPEG-LS
    LSE   = 0xf8,       ///< JPEG-LS extension parameters
    JPG9  = 0xf9,
    JPG10 = 0xfa,
    JPG11 = 0xfb,
    JPG12 = 0xfc,
    JPG13 = 0xfd,

    COM   = 0xfe,       /* comment */

    TEM   = 0x01,       /* temporary private use for arithmetic coding */

    /* 0x02 -> 0xbf reserved */
} JPEG_MARKER;

#define PREDICT(ret, topleft, top, left, predictor)\
    switch(predictor){\
        case 1: ret= left; break;\
        case 2: ret= top; break;\
        case 3: ret= topleft; break;\
        case 4: ret= left   +   top - topleft; break;\
        case 5: ret= left   + ((top - topleft)>>1); break;\
        case 6: ret= top + ((left   - topleft)>>1); break;\
        default:\
        case 7: ret= (left + top)>>1; break;\
    }


/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
/* IMPORTANT: these are only valid for 8-bit data precision! */
static const uint8_t ff_mjpeg_bits_dc_luminance[17] =
{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
static const uint8_t ff_mjpeg_val_dc_luminance[] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };

static const uint8_t ff_mjpeg_bits_dc_chrominance[17] =
{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
static const uint8_t ff_mjpeg_val_dc_chrominance[] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };

static const uint8_t ff_mjpeg_bits_ac_luminance[17] =
{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
static const uint8_t ff_mjpeg_val_ac_luminance[] =
{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
  0xf9, 0xfa
};

static const uint8_t ff_mjpeg_bits_ac_chrominance[17] =
{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };

static const uint8_t ff_mjpeg_val_ac_chrominance[] =
{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
  0xf9, 0xfa
};




int av_log_level = 100;//AV_LOG_INFO;

/*
 * log functions
 * Copyright (c) 2003 Michel Bardiaux
 */
#ifndef av_log
static void av_log(void* ptr, int level, const char *fmt, ...)
{
    va_list vl;
    va_start(vl, fmt);
    if(level>av_log_level)
        return;
    vfprintf(stderr, fmt, vl);
    va_end(vl);
}
#endif


/* VLC decoding */

//#define DEBUG_VLC

static void free_vlc(VLC *vlc)
{
    //av_freep(&vlc->table);
    free(vlc->table);
    vlc->table = 0;
}


static int build_table(VLC *vlc, int table_nb_bits,
                       int nb_codes,
                       const uint8_t *bits, 
                       const uint16_t *codes, 
                       uint32_t code_prefix, int n_prefix)
{
    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol;
    uint32_t code;
    int16_t (*table)[2];

    table_size = 1 << table_nb_bits;

    table_index = vlc->table_size;
    vlc->table_size += table_size;
    if (vlc->table_size > vlc->table_allocated) {
        vlc->table_allocated += (1 << vlc->bits);
        assert( (sizeof(int16_t) * 2 * vlc->table_allocated) < 10000);
        vlc->table = (int16_t (*)[2])realloc(vlc->table,
                sizeof(int16_t) * 2 * vlc->table_allocated);
        if (!vlc->table)
            table_index = -1;
    }

#ifdef DEBUG_VLC
    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d code_prefix=%x n=%d\n",
           table_index, table_size, code_prefix, n_prefix);
#endif
    if (table_index < 0)
        return -1;

    table = &vlc->table[table_index];

    for(i=0;i<table_size;i++) {
        table[i][1] = 0; //bits
        table[i][0] = -1; //codes
    }

    /* first pass: map codes and compute auxillary table sizes */
    for(i=0;i<nb_codes;i++) {

        n = bits[i];
        code = codes[i];

        /* we accept tables with holes */
        if (n <= 0)
            continue;
        symbol = i;
#if defined(DEBUG_VLC) && 0
        av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
#endif
        /* if code matches the prefix, it is in the table */
        n -= n_prefix;
        code_prefix2= code >> n;
        if (n > 0 && code_prefix2 == code_prefix) {
            if (n <= table_nb_bits) {
                /* no need to add another table */
                j = (code << (table_nb_bits - n)) & (table_size - 1);
                nb = 1 << (table_nb_bits - n);
                for(k=0;k<nb;k++) {
                    if (table[j][1] /*bits*/ != 0) {
                        av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
                        return -1;
                    }
                    table[j][1] = n; //bits
                    table[j][0] = symbol;
                    j++;
                }
            } else {
                n -= table_nb_bits;
                j = (code >> n) & ((1 << table_nb_bits) - 1);
#ifdef DEBUG_VLC
                av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
                       j, n);
#endif
                /* compute table size */
                n1 = -table[j][1]; //bits
                if (n > n1)
                    n1 = n;
                table[j][1] = -n1; //bits
            }
        }
    }

    /* second pass : fill auxillary tables recursively */
    for(i=0;i<table_size;i++) {
        n = table[i][1]; //bits
        if (n < 0) {
            n = -n;
            if (n > table_nb_bits) {
                n = table_nb_bits;
                table[i][1] = -n; //bits
            }
            index = build_table(vlc, n, nb_codes,
                                bits, 
                                codes, 
                                ((code_prefix << table_nb_bits) | i),
                                n_prefix + table_nb_bits);
            if (index < 0)
                return -1;
            /* note: realloc has been done, so reload tables */
            table = &vlc->table[table_index];
            table[i][0] = index; //code
        }
    }
    return table_index;
}

static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table,
                      int nb_codes, int is_ac)
{
    uint8_t huff_size[256+16];
    uint16_t huff_code[256+16];

    assert(nb_codes <= 256);

    memset(huff_size, 0, sizeof(huff_size));

    /* isn't this nicer than the one in the libjpeg ? */
    int i, j, k,nb, code, sym;
    code = 0;
    k = 0;
    for(i=1;i<=16;i++) {
        nb = bits_table[i];
        for(j=0;j<nb;j++) {
            sym = val_table[k++];
            huff_size[sym] = i;
            huff_code[sym] = code;
            code++;
        }
        code <<= 1;
    }

    if(is_ac){
        memmove(huff_size+16, huff_size, sizeof(uint8_t)*nb_codes);
        memmove(huff_code+16, huff_code, sizeof(uint16_t)*nb_codes);
        memset(huff_size, 0, sizeof(uint8_t)*16);
        memset(huff_code, 0, sizeof(uint16_t)*16);
        nb_codes += 16;
    }

    vlc->bits = 9;
    vlc->table = NULL;
    vlc->table_allocated = 0;
    vlc->table_size = 0;
    if (build_table(vlc, 9, nb_codes,
                    huff_size,
                    huff_code,
                    0, 0) < 0) {
        free_vlc(vlc);
        return -1;
    }
    return 0;
}


static void build_basic_mjpeg_vlc(MJpegDecodeContext * s)
{
    build_vlc(&s->vlcs[0][0], ff_mjpeg_bits_dc_luminance,   ff_mjpeg_val_dc_luminance,    12, 0);
    build_vlc(&s->vlcs[0][1], ff_mjpeg_bits_dc_chrominance, ff_mjpeg_val_dc_chrominance,  12, 0);
    build_vlc(&s->vlcs[1][0], ff_mjpeg_bits_ac_luminance,   ff_mjpeg_val_ac_luminance,   251, 1);
    build_vlc(&s->vlcs[1][1], ff_mjpeg_bits_ac_chrominance, ff_mjpeg_val_ac_chrominance, 251, 1);
}

static inline int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h)
{
    if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/4)
        return 0;

    av_log(av_log_ctx, AV_LOG_ERROR, "picture size invalid (%ux%u)\n", w, h);
    return -1;
}

static inline void avcodec_set_dimensions(AVCodecContext *s, int width, int height)
{
    s->coded_width = width;
    s->coded_height= height;
    s->width = -((-width )>>s->lowres);
    s->height= -((-height)>>s->lowres);
}

/* decode huffman tables and build VLC decoders */
static int ff_mjpeg_decode_dht(MJpegDecodeContext *s)
{
    int len, index, i, cLass, n, v, code_max;
    uint8_t bits_table[17];
    uint8_t val_table[256];

    len = get_word(&s->gb) - 2;

    while (len > 0) {
        if (len < 17)
            return -1;
        uint8_t bufByte = get_byte(&s->gb);
        cLass = bufByte>>4;
        index = bufByte&0xF;
        if (cLass >= 2)
            return -1;
        if (index >= 4)
            return -1;
        n = 0;
        for(i=1;i<=16;i++) {
            bits_table[i] = get_byte(&s->gb);
            n += bits_table[i];
        }
        len -= 17;
        if (len < n || n > 256)
            return -1;

        code_max = 0;
        for(i=0;i<n;i++) {
            v = get_byte(&s->gb);
            if (v > code_max)
                code_max = v;
            val_table[i] = v;
        }
        len -= n;

        /* build VLC and flush previous vlc if present */
        free_vlc(&s->vlcs[cLass][index]);
        av_log(s->avctx, AV_LOG_DEBUG, "class=%d index=%d nb_codes=%d\n",
               cLass, index, code_max + 1);
        if(build_vlc(&s->vlcs[cLass][index], bits_table, val_table, code_max + 1, cLass > 0) < 0){
            return -1;
        }
    }
    return 0;
}

static int ff_mjpeg_decode_init(AVCodecContext *avctx)
{
    av_log(0, AV_LOG_DEBUG, "mjpeg: init\n");

    MJpegDecodeContext *s = (MJpegDecodeContext *)avctx->priv_data;
    s->avctx = avctx;

    int i;
    for(i=0; i<64; i++){
        int j = ff_zigzag_direct[i];
        s->scantable.permutated[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
    }

    s->buffer_size = 0;
    s->buffer = NULL;
    s->start_code = -1;
    s->org_height = avctx->coded_height;

    build_basic_mjpeg_vlc(s);

#if 0
    if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
    {
        av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
        init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
        if (ff_mjpeg_decode_dht(s)) {
            av_log(avctx, AV_LOG_ERROR, "mjpeg: error using external huffman table, switching back to internal\n");
            build_basic_mjpeg_vlc(s);
        }
    }
#define AV_RL32(x) ((((uint8_t*)(x))[3] << 24) | \
                    (((uint8_t*)(x))[2] << 16) | \
                    (((uint8_t*)(x))[1] <<  8) | \
                     ((uint8_t*)(x))[0])
    if (avctx->extradata_size > 9 && AV_RL32(avctx->extradata + 4) == MKTAG('f','i','e','l')) {
        if (avctx->extradata[9] == 6) { /* quicktime icefloe 019 */
            s->interlace_polarity = 1; /* bottom field first */
        }
    }
#endif

    av_log(avctx, AV_LOG_DEBUG, "mjpeg: init done\n");
    return 0;
}


/* quantize tables */
static int ff_mjpeg_decode_dqt(MJpegDecodeContext *s)
{
    int len, index, i, j;

    len = get_word(&s->gb) - 2;

    while (len >= 65) {
        uint8_t bufByte = get_byte(&s->gb);
        /* only 8 bit precision handled */
        if ((bufByte >> 4) != 0)
        {
            av_log(s->avctx, AV_LOG_ERROR, "dqt: 16bit precision\n");
            return -1;
        }
        index = bufByte & 0xF;
        if (index >= 4)
            return -1;
        av_log(s->avctx, AV_LOG_DEBUG, "index=%d\n", index);
        /* read quant table */
        for(i=0;i<64;i++) {
            j = s->scantable.permutated[i];
            s->quant_matrixes[index][j] = get_byte(&s->gb);
        }

        //XXX FIXME finetune, and perhaps add dc too
        s->qscale[index]= FFMAX(
            s->quant_matrixes[index][s->scantable.permutated[1]],
            s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
        av_log(s->avctx, AV_LOG_DEBUG, "qscale[%d]: %d\n", index, s->qscale[index]);
        len -= 65;
    }

    return 0;
}


typedef struct PixFmtInfo {
    const char *name;
    uint8_t nb_channels;     /**< number of channels (including alpha) */
    uint8_t depth;           /**< bit depth of the color components */
    uint8_t x_chroma_shift;  /**< X chroma subsampling factor is 2 ^ shift */
    uint8_t y_chroma_shift;  /**< Y chroma subsampling factor is 2 ^ shift */
} PixFmtInfo;

/* this table gives more information about formats */
static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
    {},{},{},
    {},{},{},

    /* JPEG YUV */
    /*[PIX_FMT_YUVJ420P] =*/ { "yuvj420p", 3, 8, 1, 1, },
    /*[PIX_FMT_YUVJ422P] =*/ { "yuvj422p", 3, 8, 1, 0, },
    /*[PIX_FMT_YUVJ444P] =*/ { "yuvj444p", 3, 8, 0, 0, },
    /*[PIX_FMT_YUVJ440P] =*/ { "yuvj440p", 3, 8, 0, 1, },
    
    /* YUV formats */
    /*[PIX_FMT_YUV420P] =*/ { "yuv420p", 3, 8, 1, 1, },
    /*[PIX_FMT_YUV422P] =*/ { "yuv422p", 3, 8, 1, 0, },
    /*[PIX_FMT_YUV444P] =*/ { "yuv444p", 3, 8, 0, 0, },

    {},{},{}
};


static int picture_alloc(AVPicture *picture, int pix_fmt, int width, int height)
{
    int size, w2, h2, size2;
    const PixFmtInfo *pinfo;
    //pinfo = &pix_fmt_info[pix_fmt];
    size = width * height;// + 8192;
    if ( pix_fmt == PIX_FMT_YUVJ444P ) {
        size2 = size;
        w2 = width;
    } else {
        //w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
        //h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
        w2 = (width + 1) >> 1;
        h2 = (height + 1) >> 1;
        size2 = w2 * h2;// + 1024;
    }
//    uint8_t *ptr = (uint8_t *)malloc(size + 2 * size2);
//    picture->data[0] = ptr;
//    picture->data[1] = picture->data[0] + size;
//    picture->data[2] = picture->data[1] + size2;
    assert( (size + 21000) < 5000000);
    assert( (size2 + 21000) < 5000000);
    assert( size > 5000 );
    assert( height > 10 );
    assert( width > 10 );
    printf("width: %i, height %i\n", width, height);
    //assert(0);
    picture->data[0] = (uint8_t *)malloc(size + 1025 + 4096*5);
    picture->data[1] = (uint8_t *)malloc(size2 + 4096*10);
    picture->data[2] = (uint8_t *)malloc(size2 + 4096*10);
    picture->data[3] = NULL;
    picture->linesize[0] = width;
    picture->linesize[1] = w2;
    picture->linesize[2] = w2;
    picture->linesize[3] = 0;
    return size + 2 * size2;
}


static int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
{
    int len, nb_components, i, width, height, pix_fmt_id;

    /* XXX: verify len field validity */
    len = get_word(&s->gb);
    s->bits= get_byte(&s->gb);

    if(s->pegasus_rct) s->bits=9;
    if(s->bits==9 && !s->pegasus_rct) s->rct=1;    //FIXME ugly

    if (s->bits != 8 && !s->lossless){
        av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n");
        return -1;
    }

    height = get_word(&s->gb);
    width = get_word(&s->gb);

    //HACK for odd_height.mov
    if(s->interlaced && s->width == width && s->height == height + 1)
        height= s->height;

    av_log(s->avctx, AV_LOG_DEBUG, "sof0: picture: %dx%d\n", width, height);
    if(avcodec_check_dimensions(s->avctx, width, height))
        return -1;

    nb_components = get_byte(&s->gb);
    if (nb_components <= 0 ||
        nb_components > MAX_COMPONENTS)
        return -1;
    if (s->ls && !(s->bits <= 8 || nb_components == 1)){
        av_log(s->avctx, AV_LOG_ERROR, "only <= 8 bits/component or 16-bit gray accepted for JPEG-LS\n");
        return -1;
    }
    s->nb_components = nb_components;
    s->h_max = 1;
    s->v_max = 1;
    for(i=0;i<nb_components;i++) {
        /* component id */
        s->component_id[i] = get_byte(&s->gb) - 1;
        uint8_t bufByte = get_byte(&s->gb);
        s->h_count[i] = bufByte >> 4;
        s->v_count[i] = bufByte & 0xF;
        /* compute hmax and vmax (only used in interleaved case) */
        if (s->h_count[i] > s->h_max)
            s->h_max = s->h_count[i];
        if (s->v_count[i] > s->v_max)
            s->v_max = s->v_count[i];
        s->quant_index[i] = get_byte(&s->gb);
        if (s->quant_index[i] >= 4)
            return -1;
        av_log(s->avctx, AV_LOG_DEBUG, "component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
               s->v_count[i], s->component_id[i], s->quant_index[i]);
    }

    if(s->ls && (s->h_max > 1 || s->v_max > 1)) {
        av_log(s->avctx, AV_LOG_ERROR, "Subsampling in JPEG-LS is not supported.\n");
        return -1;
    }

    if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;

    /* if different size, realloc/alloc picture */
    /* XXX: also check h_count and v_count */
    if (width != s->width || height != s->height) {
        free(s->qscale_table);
        s->qscale_table = 0;

        s->width = width;
        s->height = height;
        s->interlaced = 0;

        /* test interlaced mode */
        if ( s->org_height != 0 &&
            s->height < ((s->org_height * 3) / 4)) {
            s->interlaced = 1;
            s->bottom_field = s->interlace_polarity;
            height *= 2;
        }

        avcodec_set_dimensions(s->avctx, width, height);

        assert( ((s->width+15)/16) < 1000);
        s->qscale_table = (int8_t *)malloc((s->width+15)/16);
        memset(s->qscale_table, 0, (s->width+15)/16);

    }

    if(s->interlaced && (s->bottom_field == !s->interlace_polarity))
        return 0;

    /* XXX: not complete test ! */
    pix_fmt_id = (s->h_count[0] << 20) | (s->v_count[0] << 16) |
                 (s->h_count[1] << 12) | (s->v_count[1] <<  8) |
                 (s->h_count[2] <<  4) |  s->v_count[2];
    av_log(s->avctx, AV_LOG_DEBUG, "pix fmt id %x\n", pix_fmt_id);
    switch(pix_fmt_id){
    case 0x222222:
    case 0x111111:
        if(s->rgb){
            s->avctx->pix_fmt = PIX_FMT_RGB32;
        }else if(s->nb_components==3)
            s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV444P : PIX_FMT_YUVJ444P;
        else
            s->avctx->pix_fmt = PIX_FMT_GRAY8;
        break;
    case 0x110000:
        s->avctx->pix_fmt = PIX_FMT_GRAY8;
        break;
    case 0x121111:
        s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV440P : PIX_FMT_YUVJ440P;
        break;
    case 0x211111:
    case 0x221212:
        s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV422P : PIX_FMT_YUVJ422P;
        break;
    case 0x221111:
        s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420P;
        break;
    default:
        av_log(s->avctx, AV_LOG_ERROR, "Unhandled pixel format 0x%x\n", pix_fmt_id);
        return -1;
    }
    if(s->ls){
        if(s->nb_components > 1)
            s->avctx->pix_fmt = PIX_FMT_RGB24;
        else if(s->bits <= 8)
            s->avctx->pix_fmt = PIX_FMT_GRAY8;
        else
            s->avctx->pix_fmt = PIX_FMT_GRAY16;
    }

if (s->avctx->pix_fmt != PIX_FMT_YUVJ420P)
    printf("*************************FORMAT is %x %i**************\n", pix_fmt_id, s->avctx->pix_fmt);

av_log(s->avctx, AV_LOG_DEBUG, "pixel format 0x%x\n", s->avctx->pix_fmt);
av_log(s->avctx, AV_LOG_DEBUG, "here A\n");

    picture_alloc((AVPicture*)&s->picture, s->avctx->pix_fmt, s->width, s->height);

/*
av_log(s->avctx, AV_LOG_DEBUG, "here B2\n");
    if(s->avctx->get_buffer(s->avctx, &s->picture) < 0){
av_log(s->avctx, AV_LOG_DEBUG, "here B3\n");
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
        return -1;
    }
*/

av_log(s->avctx, AV_LOG_DEBUG, "here C\n");

    for(i=0; i<3; i++){
        s->linesize[i]= s->picture.linesize[i] << s->interlaced;
    }
av_log(s->avctx, AV_LOG_DEBUG, "here D\n");

//    printf("%d %d %d %d %d %d\n", s->width, s->height, s->linesize[0], s->linesize[1], s->interlaced, s->avctx->height);

    if (len != (8+(3*nb_components)))
    {
        av_log(s->avctx, AV_LOG_DEBUG, "decode_sof0: error, len(%d) mismatch\n", len);
    }

    /* totally blank picture as progressive JPEG will only add details to it */
    if(s->progressive){
        memset(s->picture.data[0], 0, s->picture.linesize[0] * s->height);
        memset(s->picture.data[1], 0, s->picture.linesize[1] * s->height >> (s->v_max - s->v_count[1]));
        memset(s->picture.data[2], 0, s->picture.linesize[2] * s->height >> (s->v_max - s->v_count[2]));
    }
av_log(s->avctx, AV_LOG_DEBUG, "here E\n");
    return 0;
}

static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
{
    int code;

    int re_index = (&s->gb)->index;
    int re_cache0 = 0;
    GET_VLC(code, &s->gb, s->vlcs[0][dc_index].table)
    (&s->gb)->index = re_index;

    if (code < 0)
    {
        av_log(s->avctx, AV_LOG_WARNING, "mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
               &s->vlcs[0][dc_index]);
        return 0xffff;
    }

    if (code) {
        assert(code < 14);
        //printf("code: %i\n", code);

        int sign;
        int32_t cache = AV_RB32( ((const uint8_t *)(&s->gb)->buffer)+(s->gb.index>>3) );
        //uint32_t cache = (s->gb.buffer[s->gb.index>>3] << 24) | (s->buffer[(s->gb.index>>3)+1] << 16);

        //assert( (s->gb.index & 0x7) == 0 );
        cache <<= (s->gb.index&0x07);
        sign = (~cache) >> 31;
        s->gb.index += code;
        return (NEG_USR32(sign ^ cache, code) ^ sign) - sign;
    } else
        return 0;
}

/* decode block and dequantize */
static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
                        int component, int dc_index, int ac_index, int16_t *quant_matrix)
{
    int code, i, j, level, val;

//    av_log(s->avctx, AV_LOG_DEBUG, "decode block begin\n");

    /* DC coef */
    val = mjpeg_decode_dc(s, dc_index);
    if (val == 0xffff) {
        av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
        return -1;
    }
    val = val * quant_matrix[0] + s->last_dc[component];
    s->last_dc[component] = val;
    block[0] = val;
    /* AC coefs */
//    i = 0;
    {
    int re_index = (&s->gb)->index;
    int re_cache0 = 0;
    for(i = 0; i != 63; ) {
        GET_VLC(code, &s->gb, s->vlcs[1][ac_index].table)
        /* EOB */
        if (code == 0x10)
            break;               // Normal exit path from loop (not the i != 63 case)
        i += ((unsigned)code) >> 4;
        if (code != 0x100) {
            int sign;
            code &= 0xf;
            sign = (~re_cache0) >> 31;
            level = (NEG_USR32(sign ^ re_cache0, code) ^ sign) - sign;
            re_index += code;
            if (i > 63) {
/*
                if (i == 63) {
                    j = s->scantable.permutated[63];
                    block[j] = level * quant_matrix[j];
                    break;
                }
*/
                av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i);
                return -1;
            }
            j = s->scantable.permutated[i];
            block[j] = level * quant_matrix[j];
        }
    }
    (&s->gb)->index = re_index;
    }

//    av_log(s->avctx, AV_LOG_DEBUG, "decode block end\n");

    return 0;
}

/* decode block and dequantize - progressive JPEG version */
static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block,
                        int component, int dc_index, int ac_index, int16_t *quant_matrix,
                        int ss, int se, int Ah, int Al, int *EOBRUN)
{
    int code, i, j, level, val, run;

    av_log(s->avctx, AV_LOG_DEBUG, "decode block progressive begin\n");

    /* DC coef */
    if (!ss) {
        val = mjpeg_decode_dc(s, dc_index);
        assert(val != 0xffff);
        if (val == 0xffff) {
            av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
            return -1;
        }
        val = (val * quant_matrix[0] << Al) + s->last_dc[component];
    } else
        val = 0;
    s->last_dc[component] = val;
    block[0] = val;
    //assert(se);
    if (!se)
        return 0;
    /* AC coefs */
    if (*EOBRUN) {
        (*EOBRUN)--;
//        printf("here\n");
        return 0;
    }
    {
    int re_index = (&s->gb)->index;
    int re_cache0 = 0;
    for (i = ss; i != se; i++) {
        GET_VLC(code, &s->gb, s->vlcs[1][ac_index].table)
        /* Progressive JPEG use AC coeffs from zero and this decoder sets offset 16 by default */
        code -= 16;
        if (code & 0xF) {
            int sign;
            i += ((unsigned) code) >> 4;
            code &= 0xf;
            sign = (~re_cache0)>>31;
            level = (NEG_USR32(sign ^ re_cache0, code) ^ sign) - sign;
            re_index += code;

            if (i > se) {
/*
                if (i == se) {
                    j = s->scantable.permutated[se];
                    block[j] = level * quant_matrix[j] << Al;
                    break;
                }
*/
                av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i);
                return -1;
            }
            j = s->scantable.permutated[i];
            block[j] = level * quant_matrix[j] << Al;
        } else {
            run = ((unsigned) code) >> 4;
            if (run == 0xF) {// ZRL - skip 15 coefficients
                i += 15;
            } else {
                val = run;
                run = (1 << run);
                run += (((uint32_t)re_cache0) >> (32 - val)) & (run - 1);
                re_index += val;
                *EOBRUN = run - 1;
                break;
            }
        }
    }
    (&s->gb)->index = re_index;
    }

    av_log(s->avctx, AV_LOG_DEBUG, "decode block progressive end\n");

    return 0;
}

static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point_transform){
    int i, mb_x, mb_y;
    uint16_t buffer[32768][4];
    int left[3], top[3], topleft[3];
    const int linesize= s->linesize[0];
    const int mask= (1<<s->bits)-1;

    if((unsigned)s->mb_width > 32768) //dynamic alloc
        return -1;

    for(i=0; i<3; i++){
        buffer[0][i]= 1 << (s->bits + point_transform - 1);
    }
    for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
        const int modified_predictor= mb_y ? predictor : 1;
        uint8_t *ptr = s->picture.data[0] + (linesize * mb_y);

        if (s->interlaced && s->bottom_field)
            ptr += linesize >> 1;

        for(i=0; i<3; i++){
            top[i]= left[i]= topleft[i]= buffer[0][i];
        }
        for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
            if (s->restart_interval && !s->restart_count)
                s->restart_count = s->restart_interval;

            for(i=0;i<3;i++) {
                int pred;

                topleft[i]= top[i];
                top[i]= buffer[mb_x][i];

                PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);

                left[i]=
                buffer[mb_x][i]= mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform));
            }

            if (s->restart_interval && !--s->restart_count) {
                align_get_bits(&s->gb);
                skip_bits(&s->gb, 16); /* skip RSTn */
            }
        }

        if(s->rct){
            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
                ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200)>>2);
                ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1];
                ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1];
            }
        }else if(s->pegasus_rct){
            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
                ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2])>>2);
                ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1];
                ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1];
            }
        }else{
            for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
                ptr[4*mb_x+0] = buffer[mb_x][0];
                ptr[4*mb_x+1] = buffer[mb_x][1];
                ptr[4*mb_x+2] = buffer[mb_x][2];
            }
        }
    }
    return 0;
}

static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point_transform){
    int i, mb_x, mb_y;
    const int nb_components=3;

    for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
        for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
            if (s->restart_interval && !s->restart_count)
                s->restart_count = s->restart_interval;

            if(mb_x==0 || mb_y==0 || s->interlaced){
                for(i=0;i<nb_components;i++) {
                    uint8_t *ptr;
                    int n, h, v, x, y, c, j, linesize;
                    n = s->nb_blocks[i];
                    c = s->comp_index[i];
                    h = s->h_scount[i];
                    v = s->v_scount[i];
                    x = 0;
                    y = 0;
                    linesize= s->linesize[c];

                    for(j=0; j<n; j++) {
                        int pred;

                        ptr = s->picture.data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
                        if(y==0 && mb_y==0){
                            if(x==0 && mb_x==0){
                                pred= 128 << point_transform;
                            }else{
                                pred= ptr[-1];
                            }
                        }else{
                            if(x==0 && mb_x==0){
                                pred= ptr[-linesize];
                            }else{
                                PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
                            }
                        }

                        if (s->interlaced && s->bottom_field)
                            ptr += linesize >> 1;
                        *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);

                        if (++x == h) {
                            x = 0;
                            y++;
                        }
                    }
                }
            }else{
                for(i=0;i<nb_components;i++) {
                    uint8_t *ptr;
                    int n, h, v, x, y, c, j, linesize;
                    n = s->nb_blocks[i];
                    c = s->comp_index[i];
                    h = s->h_scount[i];
                    v = s->v_scount[i];
                    x = 0;
                    y = 0;
                    linesize= s->linesize[c];

                    for(j=0; j<n; j++) {
                        int pred;

                        ptr = s->picture.data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
                        PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
                        *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
                        if (++x == h) {
                            x = 0;
                            y++;
                        }
                    }
                }
            }
            if (s->restart_interval && !--s->restart_count) {
                align_get_bits(&s->gb);
                skip_bits(&s->gb, 16); /* skip RSTn */
            }
        }
    }
    return 0;
}

static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int ss, int se, int Ah, int Al){
    int i, mb_x, mb_y;
    int EOBRUN = 0;
    uint8_t* data[MAX_COMPONENTS];
    int linesize[MAX_COMPONENTS];

    if (Ah)
        return 0; /* TODO decode refinement planes too */

    for (i = 0; i < nb_components; i++) {
        int c = s->comp_index[i];
        data[c] = s->picture.data[c];
        linesize[c] = s->linesize[c];
    }

//    av_log(s->avctx, AV_LOG_DEBUG, "scan B\n");
//    printf("scan B\n");

    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
            if (s->restart_interval && !s->restart_count)
                s->restart_count = s->restart_interval;

            for (i = 0; i < nb_components; i++) {
//                av_log(s->avctx, AV_LOG_DEBUG, "X: %i Y: %i component: %i\n", mb_x, mb_y, i);
                uint8_t *ptr;
                int n, h, v, x, y, c, j;
                n = s->nb_blocks[i];
                c = s->comp_index[i];
                h = s->h_scount[i];
                v = s->v_scount[i];
                x = 0;
                y = 0;
//                av_log(s->avctx, AV_LOG_DEBUG, "X: %i Y: %i component: %i\n", mb_x, mb_y, i);
                for (j = 0; j < n; j++) {
//                    av_log(s->avctx, AV_LOG_DEBUG, "scan B 0\n");
//                    printf("scan B 0  - component %i x %i y %i block %i\n", i, mb_x * h, mb_y * v, n);
                    memset(s->block, 0, sizeof(s->block));
                    if (!s->progressive && decode_block(s, s->block, i,
                                     s->dc_index[i], s->ac_index[i],
                                     s->quant_matrixes[ s->quant_index[c] ]) < 0) {
                        av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
                        return -1;
                    }
                    //assert(se);
                    if (s->progressive && decode_block_progressive(s, s->block, i,
                                     s->dc_index[i], s->ac_index[i],
                                     s->quant_matrixes[ s->quant_index[c] ], ss, se, Ah, Al, &EOBRUN) < 0) {
                        av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
                        return -1;
                    }

//                    printf("scan B 1\n");
//                    av_log(s->avctx, AV_LOG_DEBUG, "scan B 1\n");
//                av_log(s->avctx, AV_LOG_DEBUG, "X: %i Y: %i component: %i\n", mb_x, mb_y, i);
//                    av_log(s->avctx, AV_LOG_DEBUG, "mb: %d %d processed\n", mb_y, mb_x);
                    ptr = data[c] +
                        (((linesize[c] * (v * mb_y + y) * 8) +
                        (h * mb_x + x) * 8) >> s->avctx->lowres);
                    if (s->interlaced && s->bottom_field)
                        ptr += linesize[c] >> 1;
av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8);
//                    printf("scan B 2\n");
//                    av_log(s->avctx, AV_LOG_DEBUG, "scan B 2\n");

                    if(!s->progressive)
                        ff_jref_idct_put(ptr, linesize[c], s->block);
                    else
                        ff_jref_idct_add(ptr, linesize[c], s->block);
//                    printf("scan B 3\n");
//                    av_log(s->avctx, AV_LOG_DEBUG, "scan B 3\n");
                    if (++x == h) {
                        x = 0;
                        y++;
                    }
//                    printf("scan B 4\n");
//                    av_log(s->avctx, AV_LOG_DEBUG, "scan B 4\n");
                }
            }
//            av_log(s->avctx, AV_LOG_DEBUG, "scan B 5\n");
            /* (< 1350) buggy workaround for Spectralfan.mov, should be fixed */
            if (s->restart_interval && (s->restart_interval < 1350) &&
                !--s->restart_count) {
                align_get_bits(&s->gb);
                skip_bits(&s->gb, 16); /* skip RSTn */
                for (i=0; i<nb_components; i++) /* reset dc */
                    s->last_dc[i] = 1024;
            }
        }
    }

//    printf("scan C\n");
//    av_log(s->avctx, AV_LOG_DEBUG, "scan C\n");

    return 0;
}

static int ff_mjpeg_decode_sos(MJpegDecodeContext *s)
{
    int len, nb_components, i, h, v, predictor, point_transform;
    int vmax, hmax, index, id;
    const int block_size= s->lossless ? 1 : 8;
    int ilv, prev_shift;
    uint8_t bufByte = 0;

    /* XXX: verify len field validity */
    len = get_word(&s->gb);
    nb_components = get_byte(&s->gb);
    if (len != 6+2*nb_components)
    {
        av_log(s->avctx, AV_LOG_ERROR, "decode_sos: invalid len (%d)\n", len);
        return -1;
    }
    vmax = 0;
    hmax = 0;
    for(i=0;i<nb_components;i++) {
        id = get_byte(&s->gb) - 1;
        av_log(s->avctx, AV_LOG_DEBUG, "component: %d\n", id);
        /* find component index */
        for(index=0;index<s->nb_components;index++)
            if (id == s->component_id[index])
                break;
        if (index == s->nb_components)
        {
            av_log(s->avctx, AV_LOG_ERROR, "decode_sos: index(%d) out of components\n", index);
            return -1;
        }

        s->comp_index[i] = index;

        s->nb_blocks[i] = s->h_count[index] * s->v_count[index];
        s->h_scount[i] = s->h_count[index];
        s->v_scount[i] = s->v_count[index];

        bufByte = get_byte(&s->gb);
        s->dc_index[i] = bufByte >> 4;
        s->ac_index[i] = bufByte & 0xF;

        if (s->dc_index[i] <  0 || s->ac_index[i] < 0 ||
            s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
            goto out_of_range;
#if 0 //buggy
        switch(s->start_code)
        {
            case SOF0:
                if (dc_index[i] > 1 || ac_index[i] > 1)
                    goto out_of_range;
                break;
            case SOF1:
            case SOF2:
                if (dc_index[i] > 3 || ac_index[i] > 3)
                    goto out_of_range;
                break;
            case SOF3:
                if (dc_index[i] > 3 || ac_index[i] != 0)
                    goto out_of_range;
                break;
        }
#endif
    }
    av_log(s->avctx, AV_LOG_DEBUG, "done components\n");

    predictor= get_byte(&s->gb); /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */
    ilv= get_byte(&s->gb);    /* JPEG Se / JPEG-LS ILV */
    bufByte = get_byte(&s->gb);
    prev_shift = bufByte >> 4;
    point_transform= bufByte & 0xF;

    for(i=0;i<nb_components;i++)
        s->last_dc[i] = 1024;

    if (nb_components > 1) {
        /* interleaved stream */
        s->mb_width  = (s->width  + s->h_max * block_size - 1) / (s->h_max * block_size);
        s->mb_height = (s->height + s->v_max * block_size - 1) / (s->v_max * block_size);
    } else if(!s->ls) { /* skip this for JPEG-LS */
        h = s->h_max / s->h_scount[0];
        v = s->v_max / s->v_scount[0];
        s->mb_width  = (s->width  + h * block_size - 1) / (h * block_size);
        s->mb_height = (s->height + v * block_size - 1) / (v * block_size);
        s->nb_blocks[0] = 1;
        s->h_scount[0] = 1;
        s->v_scount[0] = 1;
    }

    av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n", s->lossless ? "lossless" : "sequencial DCT", s->rgb ? "RGB" : "",
            predictor, point_transform, ilv, s->bits,
            s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : ""));

    /* mjpeg-b can have padding bytes between sos and image data, skip them */
    for (i = s->mjpb_skiptosod; i > 0; i--)
        skip_bits(&s->gb, 8);

    if (s->lossless) {
            av_log(s->avctx, AV_LOG_DEBUG, "lossless\n");
            if (s->rgb) {
                if (ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0)
                    return -1;
            } else {
                if (ljpeg_decode_yuv_scan(s, predictor, point_transform) < 0)
                    return -1;
            }
    } else {
        av_log(s->avctx, AV_LOG_DEBUG, "lossy\n");
        //assert(ilv);
        if (mjpeg_decode_scan(s, nb_components, predictor, ilv, prev_shift, point_transform) < 0)
            return -1;
    }
//    emms_c();
    return 0;
 out_of_range:
    av_log(s->avctx, AV_LOG_ERROR, "decode_sos: ac/dc index out of range\n");
    return -1;
}

static int mjpeg_decode_dri(MJpegDecodeContext *s)
{
    if (get_word(&s->gb) != 4)
        return -1;
    s->restart_interval = get_word(&s->gb);
    s->restart_count = 0;
    av_log(s->avctx, AV_LOG_DEBUG, "restart interval: %d\n", s->restart_interval);

    return 0;
}

static int mjpeg_decode_app(MJpegDecodeContext *s)
{
    int len, id, i;

    len = get_word(&s->gb);
    if (len < 5)
        return -1;
    if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
        return -1;

    id = (get_word(&s->gb) << 16) | get_word(&s->gb);
    id = be2me_32(id);
    len -= 6;

    av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id);

//    len -= 2;

    if (id == ff_get_fourcc("JFIF"))
    {
        int t_w, t_h, v1, v2;
        skip_bits(&s->gb, 8); /* the trailing zero-byte */
        v1= get_byte(&s->gb);
        v2= get_byte(&s->gb);
        skip_bits(&s->gb, 8);

        // ignore aspect ratio - XXX
        int numerator = get_word(&s->gb);
        int denominator = get_word(&s->gb);

        av_log(s->avctx, AV_LOG_INFO, "mjpeg: JFIF header found (version: %x.%x) SAR=%d/%d\n",
            v1, v2, numerator, denominator);

        t_w = get_byte(&s->gb);
        t_h = get_byte(&s->gb);
        if (t_w && t_h)
        {
            /* skip thumbnail */
            if (len-10-(t_w*t_h*3) > 0)
                len -= t_w*t_h*3;
        }
        len -= 10;
        goto out;
    }

    if (id == ff_get_fourcc("Adob") && (get_byte(&s->gb) == 'e'))
    {
        av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
        skip_bits(&s->gb, 16); /* version */
        skip_bits(&s->gb, 16); /* flags0 */
        skip_bits(&s->gb, 16); /* flags1 */
        skip_bits(&s->gb, 8);  /* transform */
        len -= 7;
        goto out;
    }

    if (id == ff_get_fourcc("LJIF")){
        av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
        skip_bits(&s->gb, 16); /* version ? */
        skip_bits(&s->gb, 16); /* unknwon always 0? */
        skip_bits(&s->gb, 16); /* unknwon always 0? */
        skip_bits(&s->gb, 16); /* unknwon always 0? */
        switch( get_byte(&s->gb)){
        case 1:
            s->rgb= 1;
            s->pegasus_rct=0;
            break;
        case 2:
            s->rgb= 1;
            s->pegasus_rct=1;
            break;
        default:
            av_log(s->avctx, AV_LOG_ERROR, "unknown colorspace\n");
        }
        len -= 9;
        goto out;
    }

    /* Apple MJPEG-A */
    if ((s->start_code == APP1) && (len > (0x28 - 8)))
    {
        id = (get_word(&s->gb) << 16) | get_word(&s->gb);
        id = be2me_32(id);
        len -= 4;
        if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
        {
#if 0
            skip_bits(&s->gb, 32); /* field size */
            skip_bits(&s->gb, 32); /* pad field size */
            skip_bits(&s->gb, 32); /* next off */
            skip_bits(&s->gb, 32); /* quant off */
            skip_bits(&s->gb, 32); /* huff off */
            skip_bits(&s->gb, 32); /* image off */
            skip_bits(&s->gb, 32); /* scan off */
            skip_bits(&s->gb, 32); /* data off */
#endif
            av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
        }
    }

out:
    /* slow but needed for extreme adobe jpegs */
    if (len < 0)
        av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
    while(--len > 0)
        skip_bits(&s->gb, 8);

    return 0;
}

static int mjpeg_decode_com(MJpegDecodeContext *s)
{
    int len = get_word(&s->gb);
    if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
        char *cbuf = (char *)malloc(len - 1);
        assert( len < 10000 );
        printf("comment\n");
        if (cbuf) {
            int i;
            for (i = 0; i < len - 2; i++)
                cbuf[i] = get_byte(&s->gb);
            if (i > 0 && cbuf[i-1] == '\n')
                cbuf[i-1] = 0;
            else
                cbuf[i] = 0;

            av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);

            /* buggy avid, it puts EOI only at every 10th frame */
            if (!strcmp(cbuf, "AVID"))
            {
                s->buggy_avid = 1;
                //        if (s->first_picture)
                //            printf("mjpeg: workarounding buggy AVID\n");
            }
            else if(!strcmp(cbuf, "CS=ITU601")){
                s->cs_itu601= 1;
            }

            free(cbuf);
        }
    }

    return 0;
}

/* return the 8 bit start code value and update the search
   state. Return -1 if no start code found */
static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
{
    uint8_t *buf_ptr;
    unsigned int v, v2;
    int val;
#ifdef DEBUG
    int skipped=0;
#endif
    buf_ptr = *pbuf_ptr;
    while (buf_ptr < buf_end) {
        v = *buf_ptr++;
        v2 = *buf_ptr;
        if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
            val = *buf_ptr++;
            goto found;
        }
#ifdef DEBUG
        skipped++;
#endif
    }
    val = -1;
found:
#ifdef DEBUG
    av_log(NULL, AV_LOG_DEBUG, "find_marker skipped %d bytes\n", skipped);
#endif
    *pbuf_ptr = buf_ptr;
    return val;
}

static int ff_mjpeg_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
{
    MJpegDecodeContext *s = (MJpegDecodeContext *)avctx->priv_data;
    uint8_t *buf_end, *buf_ptr;
    int start_code;
    AVFrame *picture = (AVFrame *)data;

    buf_ptr = buf;
    buf_end = buf + buf_size;
    av_log(avctx, AV_LOG_DEBUG, "mjpeg: decode start\n");
    while (buf_ptr < buf_end) {
        /* find start next marker */
        start_code = find_marker(&buf_ptr, buf_end);
        {
            /* EOF */
            if (start_code < 0) {
                goto the_end;
            } else {
                av_log(avctx, AV_LOG_DEBUG, "marker=%x avail_size_in_buf=%td\n", start_code, buf_end - buf_ptr);

                if ((buf_end - buf_ptr) > s->buffer_size)
                {
                    free(s->buffer);
                    s->buffer_size = buf_end-buf_ptr;
                    s->buffer = (uint8_t *)malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
                    assert( s->buffer_size < 2000000 );
                    av_log(avctx, AV_LOG_DEBUG, "buffer too small, expanding to %d bytes\n",
                        s->buffer_size);
                }

                /* unescape buffer of SOS, use special treatment for JPEG-LS */
                if (start_code == SOS && !s->ls)
                {
                    uint8_t *src = buf_ptr;
                    uint8_t *dst = s->buffer;

                    while (src<buf_end)
                    {
                        uint8_t x = *(src++);

                        *(dst++) = x;
                        if (x == 0xff) {
                            while (src < buf_end && x == 0xff)
                                x = *(src++);

                            if (x >= 0xd0 && x <= 0xd7)
                                *(dst++) = x;
                            else if (x)
                                break;
                        }
                    }
                    init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);

                    av_log(avctx, AV_LOG_DEBUG, "escaping removed %td bytes\n",
                           (buf_end - buf_ptr) - (dst - s->buffer));
                }
                else if (start_code == SOS && s->ls) {
                    uint8_t *src = buf_ptr;
                    uint8_t *dst = s->buffer;
                    int bit_count = 0;
                    int t = 0, b = 0;
                    PutBitContext pb;

                    s->cur_scan++;

                    /* find marker */
                    while (src + t < buf_end) {
                        uint8_t x = src[t++];
                        if (x == 0xff){
                            while((src + t < buf_end) && x == 0xff)
                                x = src[t++];
                            if (x & 0x80) {
                                t -= 2;
                                break;
                            }
                        }
                    }
                    bit_count = t * 8;

                    init_put_bits(&pb, dst, t);

                    /* unescape bitstream */
                    while (b < t){
                        uint8_t x = src[b++];
                        put_bits(&pb, 8, x);
                        if(x == 0xFF){
                            x = src[b++];
                            put_bits(&pb, 7, x);
                            bit_count--;
                        }
                    }
                    flush_put_bits(&pb);

                    init_get_bits(&s->gb, dst, bit_count);

                    av_log(avctx, AV_LOG_DEBUG, "ls escaping removed\n");
                }
                else
                    init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);

                s->start_code = start_code;
                av_log(avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);

                /* process markers */
                if (start_code >= 0xd0 && start_code <= 0xd7) {
                    av_log(avctx, AV_LOG_DEBUG, "restart marker: %d\n", start_code&0x0f);
                    /* APP fields */
                } else if (start_code >= APP0 && start_code <= APP15) {
                    mjpeg_decode_app(s);
                    /* Comment */
                } else if (start_code == COM) {
                    mjpeg_decode_com(s);
                }

                switch (start_code) {
                case SOI:
                    s->restart_interval = 0;

                    s->restart_count = 0;
                    /* nothing to do on SOI */
                    break;
                case DQT:
                    ff_mjpeg_decode_dqt(s);
                    break;
                case DHT:
                    if (ff_mjpeg_decode_dht(s) < 0){
                        av_log(avctx, AV_LOG_ERROR, "huffman table decode error\n");
                        return -1;
                    }
                    break;
                case SOF0:
                    s->lossless=0;
                    s->ls=0;
                    s->progressive=0;
                    if (ff_mjpeg_decode_sof(s) < 0)
                        return -1;
                    break;
                case SOF2:
                    s->lossless=0;
                    s->ls=0;
                    s->progressive=1;
                    if (ff_mjpeg_decode_sof(s) < 0)
                        return -1;
                    break;
                case SOF3:
                    s->lossless=1;
                    s->ls=0;
                    s->progressive=0;
                    if (ff_mjpeg_decode_sof(s) < 0)
                        return -1;
                    break;
                case SOF48:
                    s->lossless=1;
                    s->ls=1;
                    s->progressive=0;
                    if (ff_mjpeg_decode_sof(s) < 0)
                        return -1;
                    break;
                case LSE:
                    return -1;
                    break;
                case EOI:
                    printf("EOI\n");
                    s->cur_scan = 0;
                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
                        if (!s->progressive)
                            break;
eoi_parser:
                    {
                        if (s->interlaced) {
                            s->bottom_field ^= 1;
                            /* if not bottom field, do not output image yet */
                            if (s->bottom_field == !s->interlace_polarity)
                                goto not_the_end;
                        }
/*
                        *picture = s->picture;
                        *data_size = sizeof(AVFrame);

                        if (!s->lossless) {
                            picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]);
                            picture->qstride= 0;
                            picture->qscale_table= s->qscale_table;
                            memset(picture->qscale_table, picture->quality, (s->width+15)/16);
                            av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
                            picture->quality*= FF_QP2LAMBDA;
                        }
*/
                        goto the_end;
                    }
                    break;
                case SOS:
                    ff_mjpeg_decode_sos(s);
                    /* buggy avid puts EOI every 10-20th frame */
                    /* if restart period is over process EOI */
                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
                        if (!s->progressive)
                            goto eoi_parser;
                    break;
                case DRI:
                    mjpeg_decode_dri(s);
                    break;
                case SOF1:
                case SOF5:
                case SOF6:
                case SOF7:
                case SOF9:
                case SOF10:
                case SOF11:
                case SOF13:
                case SOF14:
                case SOF15:
                case JPG:
                    av_log(avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
                    break;
//                default:
//                    printf("mjpeg: unsupported marker (%x)\n", start_code);
//                    break;
                }

not_the_end:
                /* eof process start code */
                buf_ptr += (get_bits_count(&s->gb)+7)/8;
                av_log(avctx, AV_LOG_DEBUG, "marker parser used %d bytes (%d bits)\n",
                       (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
            }
        }
    }
the_end:
    av_log(avctx, AV_LOG_DEBUG, "mjpeg decode frame unused %td bytes\n", buf_end - buf_ptr);
//    return buf_end - buf_ptr;
    return buf_ptr - buf;
}

static int ff_mjpeg_decode_end(AVCodecContext *avctx)
{
    MJpegDecodeContext *s = (MJpegDecodeContext *)avctx->priv_data;
    int i, j;

    free(s->buffer);
    free(s->qscale_table);

    for(i=0;i<2;i++) {
        for(j=0;j<4;j++)
            free_vlc(&s->vlcs[i][j]);
    }
    return 0;
}



/*
void dsputil_init(DSPContext* c, AVCodecContext *avctx)
{
    c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;

    switch(c->idct_permutation_type){
    case FF_NO_IDCT_PERM:
        for(i=0; i<64; i++)
            c->idct_permutation[i]= i;
        break;
    case FF_LIBMPEG2_IDCT_PERM:
        for(i=0; i<64; i++)
            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
        break;
    case FF_TRANSPOSE_IDCT_PERM:
        for(i=0; i<64; i++)
            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
        break;
    case FF_PARTTRANS_IDCT_PERM:
        for(i=0; i<64; i++)
            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
    }
}
*/



/* rgb32 handling */
#define SCALEBITS 10
#define ONE_HALF  (1 << (SCALEBITS - 1))
#define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))

#ifdef USE_YUV_TABLES

static int32_t ccr1Tbl[256];
static int32_t ccr2Tbl[256];
static int32_t ccb1Tbl[256];
static int32_t ccb2Tbl[256];

void initColorTbls()
{
    static int initTbl = 0;
    int i;
    if (!initTbl) {
        initTbl = 1;
        for (i = 0; i < 256; i++) {
            ccr1Tbl[i] =  (i - 128) * FIX(1.40200) /* 1436 */ + ONE_HALF;
            ccr2Tbl[i] = -(i - 128) * FIX(0.71414) /*  731 */ + ONE_HALF;
            ccb1Tbl[i] = -(i - 128) * FIX(0.34414) /*  352 */;
            ccb2Tbl[i] =  (i - 128) * FIX(1.77200) /* 1815 */ + ONE_HALF;
        }
    }
}

#define YUV_TO_RGB1 \
{\
    r_add = ccr1Tbl[cr_ptr[0]];\
    g_add = ccr2Tbl[cr_ptr[0]] + ccb1Tbl[cb_ptr[0]];\
    b_add = ccb2Tbl[cb_ptr[0]];\
}


#else


#define YUV_TO_RGB1 \
{\
    cb = (cb_ptr[0]) - 128;\
    cr = (cr_ptr[0]) - 128;\
    r_add = FIX(1.40200) * cr + ONE_HALF;\
    g_add = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;\
    b_add = FIX(1.77200) * cb + ONE_HALF;\
}

#endif

/*
#define YUV_TO_RGB1 \
{\
    cb = cb_ptr[0] - 128;\
    cr = cr_ptr[0] - 128;\
    ccr = cr + (cr << 2);\
    ccr += (cr << 5) + (ccr << 6);\
    ccr1 = (ccr << 2) + (cr << 3);\
    ccr2 = (ccr << 1) + (cr << 4) + cr;\
    ccb = (cb << 1) + (cb << 3);\
    ccb2 = ccb + cb;\
    ccb1 = ccb2 << 5;\
    ccb2 = (ccb2 << 1) + (ccb << 7) + (cb << 9) + cb;\
    r_add = ONE_HALF + ccr1;\
    g_add = ONE_HALF - ccb1 - ccr2;\
    b_add = ONE_HALF + ccb2;\
}
*/

#define YUV_TO_RGB2(d, y1)\
{\
    y = (y1) << SCALEBITS;\
    r = cm[(y + r_add) >> SCALEBITS];\
    g = cm[(y + g_add) >> SCALEBITS];\
    b = cm[(y + b_add) >> SCALEBITS];\
    ((uint32_t *)(d))[0] = (0xFF << 24) | (r << 0) | (g << 8) | (b << 16);\
}

static void yuvj444p_to_rgb32(AVPicture *dst, const AVPicture *src, int width, int height)
{
    const uint8_t *y1_ptr, *cb_ptr, *cr_ptr;
    const uint8_t *y1, *cb1, *cr1;
    uint8_t *d, *d1;
    int w, y, cb, cr, r_add, g_add, b_add;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    unsigned int r, g, b;
    //int ccr, ccr1, ccr2, ccb, ccb1, ccb2;
#ifdef USE_YUV_TABLES
    initColorTbls();
#endif
//printf("cf1\n");
    d = dst->data[0];
    y1 = src->data[0];
    cb1 = src->data[1];
    cr1 = src->data[2];
    for(;height != 0; height--) {
        y1_ptr = y1;
        cb_ptr = cb1;
        cr_ptr = cr1;
        d1 = d;
        for(w = width; w != 0; w--) {
//printf("cf2 h: %i w: %i\n", height, w);
            YUV_TO_RGB1
            YUV_TO_RGB2(d1, y1_ptr[0]);
            d1 += 4;
            y1_ptr++;
            cb_ptr++;
            cr_ptr++;
        }
        d += dst->linesize[0];
        y1 += src->linesize[0];
        cb1 += src->linesize[1];
        cr1 += src->linesize[2];
    }
//printf("cf3\n");
}

static void yuvj420p_to_rgb32(AVPicture *dst, const AVPicture *src, int width, int height)
{
    const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr;
    uint8_t *d, *d1, *d2;
    int w, y, cb, cr, r_add, g_add, b_add, width2;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    unsigned int r, g, b;
    int ccr, ccr1, ccr2, ccb, ccb1, ccb2;
#ifdef USE_YUV_TABLES
    initColorTbls();
#endif
    d = dst->data[0];
    y1_ptr = src->data[0];
    cb_ptr = src->data[1];
    cr_ptr = src->data[2];
    width2 = (width + 1) >> 1;
    for(;height >= 2; height -= 2) {
        d1 = d;
        d2 = d + dst->linesize[0];
        y2_ptr = y1_ptr + src->linesize[0];
        for(w = width; w >= 2; w -= 2) {
            YUV_TO_RGB1
            /* output 4 pixels */
            YUV_TO_RGB2(d1, y1_ptr[0]);
            YUV_TO_RGB2(d1 + 4, y1_ptr[1]);
            YUV_TO_RGB2(d2, y2_ptr[0]);
            YUV_TO_RGB2(d2 + 4, y2_ptr[1]);
            d1 += 8;
            d2 += 8;
            y1_ptr += 2;
            y2_ptr += 2;
            cb_ptr++;
            cr_ptr++;
        }
        /* handle odd width */
        if (w) {
            YUV_TO_RGB1
            YUV_TO_RGB2(d1, y1_ptr[0]);
            YUV_TO_RGB2(d2, y2_ptr[0]);
            d1 += 4;
            d2 += 4;
            y1_ptr++;
            y2_ptr++;
            cb_ptr++;
            cr_ptr++;
        }
        d += 2 * dst->linesize[0];
        y1_ptr += 2 * src->linesize[0] - width;
        cb_ptr += src->linesize[1] - width2;
        cr_ptr += src->linesize[2] - width2;
    }
    /* handle odd height */
    if (height) {
        d1 = d;
        for(w = width; w >= 2; w -= 2) {
            YUV_TO_RGB1
            /* output 2 pixels */
            YUV_TO_RGB2(d1, y1_ptr[0]);
            YUV_TO_RGB2(d1 + 4, y1_ptr[1]);
            d1 += 8;
            y1_ptr += 2;
            cb_ptr++;
            cr_ptr++;
        }
        /* handle width */
        if (w) {
            YUV_TO_RGB1
            YUV_TO_RGB2(d1, y1_ptr[0]);
        }
    }
}