/*
 * Copyright (c) 1988-1996 Sam Leffler
 * Copyright (c) 1991-1996 Silicon Graphics, Inc.
 *
 * Permission to use, copy, modify, distribute, and sell this software and
 * its documentation for any purpose is hereby granted without fee, provided
 * that (i) the above copyright notices and this permission notice appear in
 * all copies of the software and related documentation, and (ii) the names of
 * Sam Leffler and Silicon Graphics may not be used in any advertising or
 * publicity relating to the software without the specific, prior written
 * permission of Sam Leffler and Silicon Graphics.
 *
 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 *
 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 *
 */

/*
 * TIFF Library Win32-DIB and RGBA output routines.
 *
 * Written by Hans Eckardt (hans.eckardt@t-online.de), Jena City, Thuringia, Germany
 *
 * Using the image iterator written by Conrad J. Poelman, PL/WSAT, Kirtland AFB,
 * NM on 26 Mar 96. (tif_imageiter.c v 1.1 1997/08/29 )
 * and code from Sam Lefflers tif_getimage.c v 1.43 1997/08/29
 *
 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 *
 * IN NO EVENT SHALL HANS ECKARDT BE LIABLE FOR
 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 */


/*
 * TIFF Library
 *
 * Output of pixel data as Windows DIB image formats and as RGBA.
 *
 * Conversions the routines can do for DIB output:
 *
 *   TIF image              -> DIB format:
 *
 *   2 Bpp                  ->  all Formats
 *   4 Bpp                  ->  PAL_16 and all RGB
 *   8 Bpp PAL + Gray       ->  PAL_256 and all RGB
 *	 > 8 Bpp                ->  all RGB
 *
 *
 * Note: Functions using DIB with palette assume the palette is present right after BITMAPINFOHEADER

 *  changes      :
 		1997/11/02 - recoded DIB output using the nice image iterator from Conrad J. Poelman
		             Formerly I had done this in tif_getimiage style, this code may have 
					 been faster but seems more error prone and harder to understand

 		1997/11/08 - RGBA output merged in (can re-use some helper code)

    todo:
 		1997/11/02 - test all the callbacks, especially with tiled images,
					 (have only the two tiled from Sam Lefflers distribution)

                   - if we can do dithering more format conversions should be accepted like 24 Bpp ->  PAL_256
 */

#include "tiffiop.h"
#include "tif_imageiter.h"

#include <assert.h>


/**************************************************************************
 *      general helpers used for both DIB and RGBA output
 **************************************************************************/

#ifndef TIF_RGBA_SUPPORT /*else already known */
typedef	unsigned char TIFFRGBValue;		/* 8-bit samples */
#endif

/*
 * Macros for converting pixel count to bytes
 */

#define ASBITS(n) (((n) + 7) / 8)
#define ASNIBBLES(n) (((n) + 1) / 2)


/*
 * Helpers for colormaps and gray, b&w maps
 */

static int
 checkcmap(TIFFImageIter* img)
{
	uint16 *r = img->redcmap;
	uint16 *g = img->greencmap;
	uint16 *b = img->bluecmap;
	long n = 1L << img->bitspersample;

	while (n-- > 0)
		if (*r++ >= 256 || *g++ >= 256 || *b++ >= 256)
			return (16);
	return (8);
}

static void
 cvtcmap(TIFFImageIter* img)
{
	uint16 *r = img->redcmap;
	uint16 *g = img->greencmap;
	uint16 *b = img->bluecmap;
	long i;

	for (i = (1L << img->bitspersample) - 1; i >= 0; i--)
	{
#define	CVT(x)		((uint16)((x)>>8))
		r[i] = CVT(r[i]);
		g[i] = CVT(g[i]);
		b[i] = CVT(b[i]);
#undef	CVT
	}
}

#define	PACK(r,g,b)	\
	((uint32)(r)|((uint32)(g)<<8)|((uint32)(b)<<16))

/*
 * Palette images with <= 8 bits/sample are handled
 * with a table to avoid lots of shifts and masks.  The table
 * is setup so that callback functions can retrieve
 * pixel values simply by indexing into the table with one
 * number.
 */
static uint32*
 makemap(TIFFImageIter* img, TIFFRGBValue* convert_map)
{
	int bitspersample = img->bitspersample;
	int nsamples = 1 << bitspersample;
	uint16 *r = img->redcmap;
	uint16 *g = img->greencmap;
	uint16 *b = img->bluecmap;
	uint32* palette_map;
	uint32 *p;
	int i;

	// the palette map is an uint32 value as RGBx, where R/G/B are 8 bit and x is unused
	// the size of the palette map is the count of samples
	palette_map = (uint32*) _TIFFmalloc(nsamples * sizeof(uint32));
	if (!palette_map)
	{
		TIFFError(TIFFFileName(img->tif), "No space for Palette mapping table");
		return NULL;
	}
	p = palette_map;
	for (i = 0; i < nsamples; ++i)
	{
		if (convert_map)
		{
			TIFFRGBValue gray = convert_map[i];
			*p++ = PACK(gray, gray, gray);
		}
		else
		{
			*p++ = PACK(r[i]&0xff, g[i]&0xff, b[i]&0xff);
		}
	}
	return palette_map;
}

/*
 * Construct a mapping table to convert from the range
 * of the data samples to [0,255] --for display.  This
 * process also handles inverting B&W images when needed.
 */
static TIFFRGBValue* 
 setupMap(TIFFImageIter* img)
{
	int ret= 0;
	int32 x;
	int32 range = (int32) ((1L << img->bitspersample) - 1);

	TIFFRGBValue* convert_map = (TIFFRGBValue *) _TIFFmalloc((range + 1) * sizeof(TIFFRGBValue));
	if (!convert_map)
	{
		TIFFError(TIFFFileName(img->tif),
				  "No space for photometric conversion table");
		return NULL;
	}
	if (img->photometric == PHOTOMETRIC_MINISWHITE)
	{
		for (x = 0; x <= range; x++)
			convert_map[x] = (TIFFRGBValue)(((range - x) * 255) / range);
	}
	else
	{
		for (x = 0; x <= range; x++)
			convert_map[x] = (TIFFRGBValue)((x * 255) / range);
	}
	return convert_map;
}


/*
 * Helper for YCbCr conversion to RGB
 */

typedef struct {				/* YCbCr->RGB support */
	TIFFRGBValue* clamptab;		/* range clamping table */
	int*	Cr_r_tab;
	int*	Cb_b_tab;
	int32*	Cr_g_tab;
	int32*	Cb_g_tab;
	float	coeffs[3];			/* cached for repeated use */
	int 	subsamplingType;   /* Subsampling type */
} TIFFYCbCrToRGB;


#define	LumaRed			coeffs[0]
#define	LumaGreen		coeffs[1]
#define	LumaBlue		coeffs[2]
#define	SHIFT			16
#define	FIX(x)			((int32)((x) * (1L<<SHIFT) + 0.5))
#define	ONE_HALF		((int32)(1<<(SHIFT-1)))


/*
 * YCbCr -> RGB conversion and packing routines.  The colorspace
 * conversion algorithm comes from the IJG v5a code; see below
 * for more information on how it works.
 */

#define	YCbCrtoRGB_888(cp, yc) {             \
    int Y = (yc);                            \
    *(cp) = clamptab[Y+Cbbtab[Cb]]; (cp)++;  \
    *(cp) = clamptab[Y + (int)((Cbgtab[Cb]+Crgtab[Cr])>>16)]; (cp)++;  \
    *(cp) = clamptab[Y+Crrtab[Cb]]; (cp)++;  \
    }


/*
 * Initialize the YCbCr->RGB conversion tables.  The conversion
 * is done according to the 6.0 spec:
 *
 *    R = Y + Cr*(2 - 2*LumaRed)
 *    B = Y + Cb*(2 - 2*LumaBlue)
 *    G =   Y
 *        - LumaBlue*Cb*(2-2*LumaBlue)/LumaGreen
 *        - LumaRed*Cr*(2-2*LumaRed)/LumaGreen
 *
 * To avoid floating point arithmetic the fractional constants that
 * come out of the equations are represented as fixed point values
 * in the range 0...2^16.  We also eliminate multiplications by
 * pre-calculating possible values indexed by Cb and Cr (this code
 * assumes conversion is being done for 8-bit samples).
 */
static void
 TIFFYCbCrToRGBInit(TIFFYCbCrToRGB * ycbcr, TIFF * tif)
{
	TIFFRGBValue *clamptab;
	float *coeffs;
	int i;

	clamptab = (TIFFRGBValue *) (
								 (tidata_t) ycbcr + TIFFroundup(sizeof(TIFFYCbCrToRGB), sizeof(long)));
	_TIFFmemset(clamptab, 0, 256);		  /* v < 0 => 0 */
	ycbcr->clamptab = (clamptab += 256);
	for (i = 0; i < 256; i++)
		clamptab[i] = i;
	_TIFFmemset(clamptab + 256, 255, 2 * 256);	/* v > 255 => 255 */
	TIFFGetFieldDefaulted(tif, TIFFTAG_YCBCRCOEFFICIENTS, &coeffs);
	_TIFFmemcpy(ycbcr->coeffs, coeffs, 3 * sizeof(float));
	{
		float f1 = 2 - 2 * LumaRed;
		int32 D1 = FIX(f1);
		float f2 = LumaRed * f1 / LumaGreen;
		int32 D2 = -FIX(f2);
		float f3 = 2 - 2 * LumaBlue;
		int32 D3 = FIX(f3);
		float f4 = LumaBlue * f3 / LumaGreen;
		int32 D4 = -FIX(f4);
		int x;

		ycbcr->Cr_r_tab = (int *) (clamptab + 3 * 256);
		ycbcr->Cb_b_tab = ycbcr->Cr_r_tab + 256;
		ycbcr->Cr_g_tab = (int32 *) (ycbcr->Cb_b_tab + 256);
		ycbcr->Cb_g_tab = ycbcr->Cr_g_tab + 256;
		/* i is the actual input pixel value in the range 0..255 Cb and Cr values are in the range -128..127
		 * (actually they are in a range defined by the ReferenceBlackWhite tag) so there is some range
		 * shifting to do here when constructing tables indexed by the raw pixel data.
		 * 
		 * XXX handle ReferenceBlackWhite correctly to calculate Cb/Cr values to use in constructing the tables. */
		for (i = 0, x = -128; i < 256; i++, x++)
		{
			ycbcr->Cr_r_tab[i] = (int) ((D1 * x + ONE_HALF) >> SHIFT);
			ycbcr->Cb_b_tab[i] = (int) ((D3 * x + ONE_HALF) >> SHIFT);
			ycbcr->Cr_g_tab[i] = D2 * x;
			ycbcr->Cb_g_tab[i] = D4 * x + ONE_HALF;
		}
	}
}

#undef	SHIFT
#undef	ONE_HALF
#undef	FIX
#undef	LumaBlue
#undef	LumaGreen
#undef	LumaRed

static TIFFYCbCrToRGB*
 initYCbCrConversion(TIFFImageIter* img, TIFFYCbCrToRGB* ycbcr_in)
{
	uint16 hs;
	uint16 vs;
	TIFFYCbCrToRGB* ycbcr;
	
	if (!ycbcr_in)
	{
		ycbcr = (TIFFYCbCrToRGB*) _TIFFmalloc(
					TIFFroundup(sizeof(TIFFYCbCrToRGB), sizeof(long)) +
					4 * 256 * sizeof(TIFFRGBValue) + /* clamptab */
					2 * 256 * sizeof(int) +			 /* Cr_r_tab, Cb_b_tab */
					2 * 256 * sizeof(int32));		 /* Cr_g_tab, Cb_g_tab */
		if (!ycbcr)
		{
			TIFFError(TIFFFileName(img->tif),
					  "No space for YCbCr->RGB conversion state");
			return NULL;
		}
		TIFFYCbCrToRGBInit(ycbcr, img->tif);
	}
	else
	{
		float *coeffs;
		ycbcr= ycbcr_in;
		TIFFGetFieldDefaulted(img->tif, TIFFTAG_YCBCRCOEFFICIENTS, &coeffs);
		if (_TIFFmemcmp(coeffs, ycbcr->coeffs, 3 * sizeof(float)) != 0)
		{
			TIFFYCbCrToRGBInit(ycbcr, img->tif);
		}
	}
	/* The 6.0 spec says that subsampling must be one of 1, 2, or 4, and that vertical subsampling must
	 * always be <= horizontal subsampling; so there are only a few possibilities and we just enumerate the
	 * cases. */
	TIFFGetFieldDefaulted(img->tif, TIFFTAG_YCBCRSUBSAMPLING, &hs, &vs);

	ycbcr->subsamplingType= (hs << 4) | vs;
	return ycbcr;
}


#ifdef TIF_WIN32_DIB_SUPPORT
/**************************************************************************
 *      DIB output section
 **************************************************************************/


/* a function called to convert packed RGB888 data to pther formats */
typedef	TIFMETHOD(void , TIFFImageIterPostRGBRoutine, (TIFFImageIter*, 
			struct tagDIBCallbackParams*, uint32, uint32, uint32, uint32));

#define	DECLAREPostCallbackFunc(name) \
static void __stdcall name(\
    TIFFImageIter* img, \
    struct tagDIBCallbackParams* ppara, \
    uint32 x, uint32 y, \
    uint32 w, uint32 h \
)

/*
 * Structure for dib image params in callback
 */
typedef struct tagDIBCallbackParams
{
	BYTE* pbits;             /* ptr to bitmap bits adjusted so topleft is (0,0) */
	uint32 bitCount;
	uint32 w;				 /* DIB width */
	uint32 scanlinePadBytes; /* dib scanline padding bytes */
	uint32 scanlineBytes;    /* bytes of a dib scanline with padding added */
	/* for post callback function */
	TIFFImageIterPostRGBRoutine post;
	BYTE* pTmp;
	uint32 tmpSize;
	/* for palette conversions */
	uint32* palette_map;
	/* for YCBCR conversions */
	TIFFYCbCrToRGB* ycbcr;
} DIBCallbackParams;


/**************************************************************************
 *      convert packed RGB888 data to other RGB formats
 **************************************************************************/

// 8 bit DIB -> 565 (DIB mask)
#define RGB888_2_RGB_565(dest_p, r, g, b) { \
			WORD wOut = (WORD)((r) >> (8 - 5)) << 11; \
			wOut |=     (WORD)((g) >> (8 - 6)) << 5;  \
			wOut |=     (WORD)((b) >> (8 - 5)) << 0;  \
			*(dest_p) = LOBYTE(wOut);  \
			++(dest_p);				   \
			*(dest_p) = HIBYTE(wOut);  \
			++(dest_p);				   \
}
// 8 bit DIB -> 555 (no DIB mask)
#define RGB888_2_RGB_555(dest_p, r, g, b) { \
			WORD wOut = (WORD)((r) >> (8 - 5)) << 10; \
			wOut |=     (WORD)((g) >> (8 - 5)) << 5;  \
			wOut |=     (WORD)((b) >> (8 - 5)) << 0;  \
			*(dest_p) = LOBYTE(wOut);  \
			++(dest_p);				   \
			*(dest_p) = HIBYTE(wOut);  \
			++(dest_p);				   \
}

/**************************************************************************
 *      convert TIFF data to RGB
 **************************************************************************/

// 8 bit R/G/B -> DIB as 888 (no DIB mask)
#define TIFBYTE_2_RGB_888(dest_p, r, g, b) \
			*(dest_p) = b;  \
			++(dest_p);	    \
			*(dest_p) = g;  \
			++(dest_p);	    \
			*(dest_p) = r;  \
			++(dest_p);


#define TIFPAL_2_B(pal) ((BYTE) ((pal) >> 16))
#define TIFPAL_2_G(pal) ((BYTE) ((pal) >> 8))
#define TIFPAL_2_R(pal) ((BYTE) (pal))

// 24 bit rgb DIB
#define TIFPAL_2_RGB_888(cp, pal) \
 			*(cp) = TIFPAL_2_B(pal); \
			++(cp);				   \
			*(cp) = TIFPAL_2_G(pal); \
			++(cp);				   \
			*(cp) = TIFPAL_2_R(pal); \
			++(cp);				   

/*
 * Conversion from 8-bit RGB data to RGB 565
 */
DECLAREPostCallbackFunc(convRGB_rgb565)
{
#define BYTESPERPIXSRC 3
#define BYTESPERPIXDST 2
	BYTE* src_p= ppara->pTmp;
	BYTE* dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x * BYTESPERPIXDST);
	uint32 toskew= ppara->scanlineBytes + w * BYTESPERPIXDST;
	while (h-- > 0)
	{
		uint32 _x; 
		for (_x = w; _x != 0; _x --) 
		{ 
			RGB888_2_RGB_565(dest_p, src_p[2], src_p[1], src_p[0]);
			src_p += BYTESPERPIXSRC;
		} 
		dest_p -= toskew;
	}
#undef BYTESPERPIXSRC
#undef BYTESPERPIXDST
}

/*
 * Conversion from 8-bit RGB data to RGB 555
 */
DECLAREPostCallbackFunc(convRGB_rgb555)
{
#define BYTESPERPIXSRC 3
#define BYTESPERPIXDST 2
	BYTE* src_p= ppara->pTmp;
	BYTE* dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x * BYTESPERPIXDST);
	uint32 toskew= ppara->scanlineBytes + w * BYTESPERPIXDST;
	while (h-- > 0)
	{
		uint32 _x; 
		for (_x = w; _x != 0; _x --) 
		{ 
			RGB888_2_RGB_555(dest_p, src_p[2], src_p[1], src_p[0]);
			src_p += BYTESPERPIXSRC;
		} 
		dest_p -= toskew;
	}
#undef BYTESPERPIXSRC
#undef BYTESPERPIXDST
}

static int TmpBufAlloc(TIFFImageIter* img, DIBCallbackParams* ppara, int newSize)
{
	if (ppara->pTmp)
		_TIFFfree(ppara->pTmp);

	ppara->tmpSize = newSize;
	ppara->pTmp= (BYTE*) _TIFFmalloc(ppara->tmpSize);
	if (!ppara->pTmp)
	{
		TIFFError(TIFFFileName(img->tif), "No space for tile buffer");
		return 0;
	}
	return 1;
}

/*
 * The following callback routines move decoded data returned
 * from the image iterator into DIB BGR pixels 
 */


/**************************************************************************
 *      1-bit packed samples
 **************************************************************************/


/*
 * 1-bit packed samples, with and without Map => RGB
 */
/*PAL WORKS, RGB untested*/
DECLAREContigCallbackFunc(putRGBcontig1bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	if (ppara->palette_map)
	{
		uint32* palmap = ppara->palette_map;
		uint32 fromskew_bytes= ASBITS(fromskew);
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 8; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGB_888(dest_p, palmap[(b >> 7) & 1]);
					b= b << 1;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 1 bit RGB TIFF
		// Note the code is in no case performance optimized because
		// it's unlikely someone uses such format
		uint32 fromskew_bytes= ASBITS(fromskew*1*3); // really???
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			int bitpos= -1;
			int rgbIndx= 0;  //red
			BYTE c;
			BYTE rgb[3];
			for(;;)
			{
				if (bitpos < 0)
				{
					c= *src_p;
					++src_p;
					bitpos= 7;
				}
				rgb[rgbIndx]= ((c >> bitpos) & 1) * 255;
				++rgbIndx;
				--bitpos;
				if (rgbIndx == 3)
				{
					TIFBYTE_2_RGB_888(dest_p, rgb[0], rgb[1], rgb[2]);
					rgbIndx = 0;
					--_x;
					if (_x == 0)
						break;
				}
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}

/**************************************************************************
 *      2-bit packed samples
 **************************************************************************/


/*
 * 2-bit packed samples, with and without Map => RGB
 */
/*UNTESTED*/
DECLAREContigCallbackFunc(putRGBcontig2bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	if (ppara->palette_map)
	{
		uint32* palmap = ppara->palette_map;
		uint32 fromskew_bytes= (fromskew + 3) / 4;
		// 2 bit palette TIFF
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 4; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGB_888(dest_p, palmap[(b >> 6) & 3]);
					b= b << 2;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 2 bit RGB TIFF
		// Note the code is in no case performance optimized because
		// it's unlikely someone uses such format
		uint32 fromskew_bytes= ASBITS(fromskew*2*3); // really???
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			int bitpos= -2;
			int rgbIndx= 0;  //red
			BYTE c;
			BYTE rgb[3];
			for(;;)
			{
				if (bitpos < 0)
				{
					c= *src_p;
					++src_p;
					bitpos= 6;
				}
				rgb[rgbIndx]= ((c >> bitpos) & 3) * (255/15);
				++rgbIndx;
				bitpos-= 2;
				if (rgbIndx == 3)
				{
					TIFBYTE_2_RGB_888(dest_p, rgb[0], rgb[1], rgb[2]);
					rgbIndx = 0;
					--_x;
					if (_x == 0)
						break;
				}
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}

/**************************************************************************
 *      4-bit packed samples
 **************************************************************************/

/* Note on 4 bit packed RGB samples:
   Seems there is confusion about adding pad bytes in the tif image data stream.
   The tiff spec says it must be padded to byte for 4-bit data. 
   But it seems there is more padding at end of a line in some tiff files.
   So a 425 pixel wide image I got is not shown correctly. 

   (like in other imaging programs too..)
   If anyone knows what's wrong please mail me.

  */

/*
 * 4-bit packed samples, with and without Map => RGB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putRGBcontig4bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;

	assert(!img->alpha); // no alpha channel here

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	if (ppara->palette_map)
	{
		// 4 bit palette TIFF
		uint32 fromskew_bytes= ASNIBBLES(fromskew);
		uint32* palmap = ppara->palette_map;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 2; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGB_888(dest_p, palmap[(b >> 4) & 0x0F]);
					b= b << 4;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 4 bit RGB TIFF
		uint32 fromskew_bytes= ASBITS(fromskew*4*3); // really??? se comment above
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				static const BYTE cvttab[16]= 
				  {0*17, 1*17, 2*17, 3*17, 4*17, 5*17, 6*17, 7*17,
				   8*17, 9*17,10*17,11*17,12*17,13*17,14*17,15*17};
				BYTE c= *src_p; // RG
				BYTE r= cvttab[(c >> 4) & 15];
				BYTE g= cvttab[c & 15];
				BYTE b;
				++src_p;
				c= *src_p; // BR
				++src_p;
				b= cvttab[(c >> 4) & 15];
				TIFBYTE_2_RGB_888(dest_p, r, g, b);
				--_x;
				if (_x == 0)
					break;
				r= cvttab[c & 15];
				c= *src_p; // GB
				++src_p;
				g= cvttab[(c >> 4) & 15];
				b= cvttab[c & 15];
				TIFBYTE_2_RGB_888(dest_p, r, g, b);
				--_x;
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}

	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}

/**************************************************************************
 *      8-bit packed samples
 **************************************************************************/


/*
 * 8-bit packed samples, with and without Map => RGB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putRGBcontig8bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	if (ppara->palette_map)
	{
		uint32* palmap = ppara->palette_map;

		assert(!img->alpha); // know to have alpha == 0

		fromskew_bytes= fromskew * 1;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFPAL_2_RGB_888(dest_p, palmap[*src_p]);
				++src_p;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		switch (img->alpha)
		{
		case 0:
			fromskew_bytes= fromskew * 3;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					TIFBYTE_2_RGB_888(dest_p, src_p[0], src_p[1], src_p[2]);
					src_p += 3;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		case EXTRASAMPLE_ASSOCALPHA:
			fromskew_bytes= fromskew * 4;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					TIFBYTE_2_RGB_888(dest_p, src_p[0], src_p[1], src_p[2]);
					// alpha is src_p[3]; unused
					src_p += 4;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		case EXTRASAMPLE_UNASSALPHA:
			fromskew_bytes= fromskew * 3;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					uint32 a = src_p[3];  // alpha
					u_char r = (u_char)((a * src_p[0]) / 255); // multiply by alpha value
					u_char g = (u_char)((a * src_p[1]) / 255);
					u_char b = (u_char)((a * src_p[2]) / 255);
					TIFBYTE_2_RGB_888(dest_p, r, g, b);
					src_p += 4;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		default:
			break;
		}
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}


/**************************************************************************
 *      16-bit packed samples
 **************************************************************************/

// 16 bit R/G/B -> DIB as 888 (no DIB mask)
#define TIFFWORD_2_RGB_888(cp, r, g, b) \
			*(cp) = (BYTE)((r) >> 8); \
			++(cp);				      \
			*(cp) = (BYTE)((g) >> 8); \
			++(cp);				      \
			*(cp) = (BYTE)((b) >> 8); \
			++(cp);

/*
 * 16-bit packed samples, no Map => RGB
 */
/*UNTESTED*/
DECLAREContigCallbackFunc(putRGBcontig16bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_words;
	uint16* src_wp = (uint16 *) src_p;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	switch (img->alpha)
	{
	case 0:
		fromskew_words= fromskew * 3;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGB_888(dest_p, src_wp[0], src_wp[1], src_wp[2]);
				src_wp += 3;
			} 
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		fromskew_words= fromskew * 4;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGB_888(dest_p, src_wp[0], src_wp[1], src_wp[2]);
				// alpha is src_wp[3]; unused
				src_wp += 4;
			} 
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		fromskew_words= fromskew * 3;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			/* We shift alpha down four bits just in case unsigned arithmetic doesn't handle the full range. We
			 * still have plenty of accuracy, since the output is 8 bits. So we have (r * 0xffff) * (a * 0xfff))
			 * = r*a * (0xffff*0xfff) Since we want r*a * 0xff for eight bit output, we divide by (0xffff *
			 * 0xfff) / 0xff == 0x10eff. */
			for (_x = w; _x != 0; --_x) 
			{
				uint32 a = src_wp[3] >> 4;  // alpha
				uint32 r = (src_wp[0] * a) / 0x10eff;
				uint32 g = (src_wp[1] * a) / 0x10eff;
				uint32 b = (src_wp[2] * a) / 0x10eff;
				TIFFWORD_2_RGB_888(dest_p, r, g, b);
				src_wp += 4;
			}
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	default:
		break;
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}


/**************************************************************************
 *      helper functions for colormaps and gray, b&w maps
 **************************************************************************/


/*
 * Palette images with <= 8 bits/sample and DIB palette output need a DIB color
 * table we construct here.
 * We assume contig memory after bitmapinfoheader and colortable with sufficient size
 */
static int
 makeDIBpal(TIFFImageIter* img, DIBCallbackParams* ppara, 
           BITMAPINFO* pdib, TIFFRGBValue* convert_map)
{
	int palentries = 1 << img->bitspersample;
	uint16 *r = img->redcmap;
	uint16 *g = img->greencmap;
	uint16 *b = img->bluecmap;
	int i;

	if (img->bitspersample > pdib->bmiHeader.biBitCount)
	{
		TIFFError(TIFFFileName(img->tif), "DIB palette smaller than images");
		return 0;
	}
	for (i = 0; i < palentries; ++i)
	{
		if (convert_map)
		{
			TIFFRGBValue gray = convert_map[i];
			pdib->bmiColors[i].rgbBlue= gray & 0xff;
			pdib->bmiColors[i].rgbGreen= gray & 0xff;
			pdib->bmiColors[i].rgbRed= gray & 0xff;
			pdib->bmiColors[i].rgbReserved= 0;
		}
		else
		{
			pdib->bmiColors[i].rgbBlue= b[i] & 0xff;
			pdib->bmiColors[i].rgbGreen= g[i] & 0xff;
			pdib->bmiColors[i].rgbRed= r[i] & 0xff;
			pdib->bmiColors[i].rgbReserved= 0;
		}
	}
	return 1;
}


/*
 * Construct any mapping table used
 * by the associated callback routine.
 */
static int
 buildMap_DIB(TIFFImageIter* img, DIBCallbackParams* ppara, BITMAPINFO* pdib)
{
	TIFFRGBValue* convert_map;
	int ret= 1;
	switch (img->photometric)
	{
	case PHOTOMETRIC_MINISBLACK:
	case PHOTOMETRIC_MINISWHITE:
		convert_map= setupMap(img);
		if (!convert_map)
			return 0;
		/* if DIB > 8bpp or image > 8 bpp need a palette_map for unpacking */
		if ((ppara->bitCount > 8) || (img->bitspersample > 8))
		{
			/* Use photometric mapping table to construct unpacking 
			   tables for samples <= 8 bits. */
			ppara->palette_map= makemap(img, convert_map);
			if (!ppara->palette_map)
				ret= 0;
		}
		else
		{
			/* Use photometric mapping table to construct dib palette for 
			   DIBs < 8 bits. */
			ret= makeDIBpal(img, ppara, pdib, convert_map);
		}
		/* no longer need Map, free it */
		_TIFFfree(convert_map);
		break;
	case PHOTOMETRIC_PALETTE:
		{
		BOOL bDIBpal= TRUE;
		/* Convert 16-bit colormap to 8-bit (unless it looks like an old-style 8-bit colormap). */
		if (checkcmap(img) == 16)
			cvtcmap(img);
		else
			TIFFWarning(TIFFFileName(img->tif), "Assuming 8-bit colormap");

		if (img->bitspersample <= 8)
		{
			/* does the DIB use palette ? */
			if (ppara->bitCount > 8)
			{
				/* no, is RGB */
				bDIBpal= FALSE;
				/* Use colormap to construct palette map. */
				ppara->palette_map= makemap(img, NULL);
				if (!ppara->palette_map)
					ret= 0;
			}
		}
		if (bDIBpal)
		{
			/* Use mapping table and colormap to construct dib pal for samples < 8 bits. */
			if (!makeDIBpal(img, ppara, pdib, NULL))
				return 0;
		}
		}
		break;
	default:
		break;
	}
	return ret;
}


/**************************************************************************
 *      YCBCR decoders
 **************************************************************************/


 /*
 * 8-bit YCbCr => RGB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putcontig8bitYCbCrtile_rgb888)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
    TIFFYCbCrToRGB* ycbcr = ppara->ycbcr;
    int* Crrtab = ycbcr->Cr_r_tab;
    int* Cbbtab = ycbcr->Cb_b_tab;
    int32* Crgtab = ycbcr->Cr_g_tab;
    int32* Cbgtab = ycbcr->Cb_g_tab;
    TIFFRGBValue* clamptab = ycbcr->clamptab;

	BYTE* dest_p;
	BYTE* dest1_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes;

	/* for cases we need two output lines */
	BOOL bWriteTwoLines= (ppara->ycbcr->subsamplingType == 0x42) ||
		                (ppara->ycbcr->subsamplingType == 0x22);

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		if (bWriteTwoLines)
		{
			dest1_p= dest_p + w * SAMPERPIXDST;
			toskew= (-1) * w * SAMPERPIXDST;
		}
		else
			toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		if (bWriteTwoLines)
		{
			dest1_p= dest_p - ppara->scanlineBytes;
			toskew= 2 * ppara->scanlineBytes + w * SAMPERPIXDST;
		}
		else
			toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}

	fromskew_bytes= fromskew * 3;
			
	switch (ppara->ycbcr->subsamplingType)
	{
	case 0x44: /* UNTESTED */
		{
		/* here we need four output lines, so we don't merge this case in */
		BYTE* dest2_p;
		BYTE* dest3_p;
		if (ppara->post)
		{
			dest1_p= dest_p + w * SAMPERPIXDST;
			dest2_p= dest1_p + w * SAMPERPIXDST;
			dest3_p= dest2_p + w * SAMPERPIXDST;
			toskew= (-4) * w * SAMPERPIXDST;
		}
		else
		{
			dest1_p= dest_p - ppara->scanlineBytes;
			dest2_p= dest1_p - ppara->scanlineBytes;
			dest3_p= dest2_p - ppara->scanlineBytes;
			toskew= 4 * ppara->scanlineBytes + w * SAMPERPIXDST;
		}
		for (_h= h; _h >= 4; _h-= 4)
		{
			uint32 _x; 
			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[16];
				int Cr = src_p[17];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				YCbCrtoRGB_888(dest_p, src_p[1]);
				YCbCrtoRGB_888(dest_p, src_p[2]);
				YCbCrtoRGB_888(dest_p, src_p[3]);
				YCbCrtoRGB_888(dest1_p, src_p[4]);
				YCbCrtoRGB_888(dest1_p, src_p[5]);
				YCbCrtoRGB_888(dest1_p, src_p[6]);
				YCbCrtoRGB_888(dest1_p, src_p[7]);
				YCbCrtoRGB_888(dest2_p, src_p[8]);
				YCbCrtoRGB_888(dest2_p, src_p[9]);
				YCbCrtoRGB_888(dest2_p, src_p[10]);
				YCbCrtoRGB_888(dest2_p, src_p[11]);
				YCbCrtoRGB_888(dest3_p, src_p[12]);
				YCbCrtoRGB_888(dest3_p, src_p[13]);
				YCbCrtoRGB_888(dest3_p, src_p[14]);
				YCbCrtoRGB_888(dest3_p, src_p[15]);
				src_p += 18;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			dest2_p -= toskew;
			dest3_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
		}
	case 0x42: /* UNTESTED */
		for (_h= h; _h >= 2; _h-= 2)
		{
			uint32 _x; 
			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[8];
				int Cr = src_p[9];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				YCbCrtoRGB_888(dest_p, src_p[1]);
				YCbCrtoRGB_888(dest_p, src_p[2]);
				YCbCrtoRGB_888(dest_p, src_p[3]);
				YCbCrtoRGB_888(dest1_p, src_p[4]);
				YCbCrtoRGB_888(dest1_p, src_p[5]);
				YCbCrtoRGB_888(dest1_p, src_p[6]);
				YCbCrtoRGB_888(dest1_p, src_p[7]);
				src_p += 10;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x41: /* UNTESTED */
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[4];
				int Cr = src_p[5];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				YCbCrtoRGB_888(dest_p, src_p[1]);
				YCbCrtoRGB_888(dest_p, src_p[2]);
				YCbCrtoRGB_888(dest_p, src_p[3]);
				src_p += 6;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x22: /* WORKS */
		for (_h= h; _h >= 2; _h-= 2)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[4];
				int Cr = src_p[5];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				YCbCrtoRGB_888(dest_p, src_p[1]);
				YCbCrtoRGB_888(dest1_p, src_p[2]);
				YCbCrtoRGB_888(dest1_p, src_p[3]);
				src_p += 6;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x21: /* UNTESTED */
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[2];
				int Cr = src_p[3];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				YCbCrtoRGB_888(dest_p, src_p[1]);
				src_p += 4;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x11: /* UNTESTED */
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[1];
				int Cr = src_p[2];
				YCbCrtoRGB_888(dest_p, src_p[0]);
				src_p += 3;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		/* checked before */
		break;
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}



/**************************************************************************
 *      Palette DIB output routines
 **************************************************************************/

 /*
 * 1-bit TIFF => Palette DIB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putPALMAP1bit_pal)
{
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes= ASBITS(fromskew);

	assert(!ppara->post);
	assert(x % 8 == 0);

	switch (ppara->bitCount)
	{
	case 1:
		{
		uint32 _w= ASBITS(w);
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x / 8);
		toskew= ppara->scanlineBytes + ASBITS(w);
		fromskew_bytes+= _w;
		toskew-= _w;
		while (h-- > 0)
		{
			memcpy(dest_p, src_p, _w);
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
		}
	case 4:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x / 2);
		toskew= ppara->scanlineBytes + ASNIBBLES(w);
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 4; (i != 0) && (_x != 0); --i, _x-= 2)
				{
					*dest_p= ((b >> 6) & 1) | (((b >> 6) & 2) << 3);
					b= b << 2;
					++dest_p;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 8:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + x;
		toskew= ppara->scanlineBytes + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 8; (i != 0) && (_x != 0); --i, --_x)
				{
					*dest_p= ((b >> 7) & 1);
					++dest_p;
					b= b << 1;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
}

 /*
 * 2-bit TIFF => Palette DIB
 */
/*UNTESTED*/
DECLAREContigCallbackFunc(putPALMAP2bit_pal)
{
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes= ASBITS(fromskew*2);

	assert(!ppara->post);
	assert(x % 8 == 0);

	switch (ppara->bitCount)
	{
	case 4:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x / 2);
		toskew= ppara->scanlineBytes + ASNIBBLES(w);
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 2; i != 0; --i)
				{
					*dest_p= ((b >> 6) & 3)	| (((b >> 6) & 12) << 2);
					++dest_p;
					--_x;
					if (_x == 0)
						break;
					b= b << 4;
					*dest_p= ((b >> 6) & 3)	| (((b >> 6) & 12) << 2);
					++dest_p;
					--_x;
					if (_x == 0)
						break;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 8:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + x;
		toskew= ppara->scanlineBytes + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 4; i != 0; --i)
				{
					*dest_p= ((b >> 6) & 1);
					++dest_p;
					--_x;
					if (_x == 0)
						break;
					*dest_p= ((b >> 6) & 3);
					++dest_p;
					--_x;
					if (_x == 0)
						break;
					b= b << 2;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
}


 /*
 * 4-bit TIFF => Palette DIB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putPALMAP4bit_pal)
{
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes= ASBITS(fromskew*4);

	assert(!ppara->post);
	assert(x % 8 == 0);

	switch (ppara->bitCount)
	{
	case 4:
		{
		uint32 _w= ASBITS(w*4);
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (x / 2);
		toskew= ppara->scanlineBytes + ASNIBBLES(w);
		fromskew_bytes+= _w;
		toskew-= _w;
		while (h-- > 0)
		{
			memcpy(dest_p, src_p, _w);
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
		}
	case 8:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + x;
		toskew= ppara->scanlineBytes + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x > 0) 
			{
				BYTE b= *src_p;
				++src_p;
				*dest_p= ((b >> 4) & 15);
				++dest_p;
				--_x;
				if (_x == 0)
					break;
				*dest_p= ((b >> 0) & 15);
				++dest_p;
				--_x;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
}


 /*
 * 8-bit TIFF => Palette DIB
 */
/*WORKS*/
DECLAREContigCallbackFunc(putPALMAP8bit_pal)
{
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 fromskew_bytes= fromskew;

	assert(!ppara->post);
	assert(x % 8 == 0);

	switch (ppara->bitCount)
	{
	case 8:
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + x;
		toskew= ppara->scanlineBytes;
		fromskew_bytes+= w;
		while (h-- > 0)
		{
			memcpy(dest_p, src_p, w);
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
}





/**************************************************************************
 *      TIFF with separated samples => RGB DIB
 **************************************************************************/

/*
 * 8-bit separate samples, no Map => RGB
 */
/*WORKS*/
DECLARESepCallbackFunc(putRGBseparate8bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	assert (!ppara->palette_map);
	switch (img->alpha)
	{
	case 0:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFBYTE_2_RGB_888(dest_p, *src_r_p, *src_g_p, *src_b_p);
				++src_r_p;
				++src_g_p;
				++src_b_p;
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFBYTE_2_RGB_888(dest_p, *src_r_p, *src_g_p, *src_b_p);
				// alpha is *src_alpha_p; unused
				++src_r_p;
				++src_g_p;
				++src_b_p;
				//++src_alpha_p; // if we would use it
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
			//src_alpha_p += fromskew_bytes; // if we would use it
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				uint32 a = *src_alpha_p;  // alpha
				u_char r = (u_char)((a * (*src_r_p)) / 255); // multiply by alpha value
				u_char g = (u_char)((a * (*src_g_p)) / 255);
				u_char b = (u_char)((a * (*src_b_p)) / 255);
				TIFBYTE_2_RGB_888(dest_p, r, g, b);
				++src_r_p;
				++src_g_p;
				++src_b_p;
				++src_alpha_p;
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
			src_alpha_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}

/*
 * 16-bit separate samples, no Map => RGB
 */
/*UNTESTED*/
DECLARESepCallbackFunc(putRGBseparate16bittile_rgb)
{
#define SAMPERPIXDST 3
	DIBCallbackParams* ppara = (DIBCallbackParams*) user_data;
	BYTE* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_words= fromskew;
	uint16* src_r_wp = (uint16 *) src_r_p;
	uint16* src_g_wp = (uint16 *) src_g_p;
	uint16* src_b_wp = (uint16 *) src_b_p;
	uint16* src_alpha_wp = (uint16 *) src_alpha_p;

	if (ppara->post)
	{
		if (ppara->tmpSize < h * w * SAMPERPIXDST)
		{
			if (!TmpBufAlloc(img, ppara, h * w * SAMPERPIXDST))
				return;
		}
		/* temporary array starts at x=0, y= 0 going down without any padding */
		dest_p= ppara->pTmp;
		toskew= 0;
	}
	else
	{
		dest_p= ppara->pbits - (y * ppara->scanlineBytes) + (SAMPERPIXDST * x);
		toskew= ppara->scanlineBytes + w * SAMPERPIXDST;
	}
	switch (img->alpha)
	{
	case 0:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGB_888(dest_p, *src_r_wp, *src_g_wp, *src_b_wp);
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGB_888(dest_p, *src_r_wp, *src_g_wp, *src_b_wp);
				// alpha is *src_alpha_wp; unused
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
				//++src_alpha_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
			//src_alpha_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			/* We shift alpha down four bits just in case unsigned arithmetic doesn't handle the full range. We
			 * still have plenty of accuracy, since the output is 8 bits. So we have (r * 0xffff) * (a * 0xfff))
			 * = r*a * (0xffff*0xfff) Since we want r*a * 0xff for eight bit output, we divide by (0xffff *
			 * 0xfff) / 0xff == 0x10eff. */
			for (_x = w; _x != 0; --_x) 
			{
				uint32 a = (*src_alpha_wp) >> 4;  // alpha
				uint32 r = ((*src_r_wp) * a) / 0x10eff;
				uint32 g = ((*src_g_wp) * a) / 0x10eff;
				uint32 b = ((*src_b_wp) * a) / 0x10eff;
				TIFFWORD_2_RGB_888(dest_p, r, g, b);
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
				++src_alpha_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
			src_alpha_wp += fromskew_words;
		}
		break;
	default:
		break;
	}
	if (ppara->post)
		(*(ppara->post)) (img, ppara, x, y, w, h);
#undef SAMPERPIXDST
}


/**************************************************************************
 *      DIB Interface routines
 **************************************************************************/


/*
 * Read the specified image into a Windows DIB using image iterator func
 */
TIFF_DLLINTERFACE(int)
TIFFReadDIBImage(TIFF * tif,
				 BITMAPINFOHEADER* pbmi, BYTE* pbits, int stop)
{
	char emsg[1024];
	TIFFImageIter img;
	int ok= 0;
	int msg_seen= 0;
	DIBCallbackParams dibcbp;

	/* Assume valid DIB header */
	if (pbmi->biHeight <= 0)
	{
		sprintf(emsg, "DIB height negative or zero !");
		TIFFError(TIFFFileName(tif), emsg);
		return 0;
	}
	/* set params used by callback function */

	/* Metrics of a scanline  */
	dibcbp.scanlineBytes= ((pbmi->biWidth * pbmi->biBitCount + 31) & ~31) / 8;
	dibcbp.scanlinePadBytes= dibcbp.scanlineBytes - ((pbmi->biWidth * pbmi->biBitCount + 7) / 8);

	/* ptr to dib bits */
	dibcbp.pbits= pbits + (pbmi->biHeight-1) * dibcbp.scanlineBytes;

	/* params of the DIB */
	dibcbp.w= pbmi->biWidth;
	dibcbp.bitCount= pbmi->biBitCount;
	dibcbp.post= NULL;
	dibcbp.pTmp= NULL;
	dibcbp.tmpSize= 0;

	dibcbp.palette_map= NULL;

	dibcbp.ycbcr= NULL;

	for (;;)
	{
		ok = TIFFImageIterBegin(&img, tif, stop, emsg);
		if (!ok)
			break;
		if (pbmi->biWidth < (int)img.width)
		{
			sprintf(emsg, "DIB width lower than image width");
			ok= 0;
			break;
		}
		if (pbmi->biHeight < (int)img.height)
		{
			sprintf(emsg, "DIB height lower than image height");
			ok= 0;
			break;
		}

		/* we assume contig palette in bitmapinfo structure */
		ok= buildMap_DIB(&img, &dibcbp, (BITMAPINFO*) pbmi);
		if (!ok)
		{
			msg_seen= 1;
			break;
		}

		/* set post routine needed for converstion to 15/16bit RGB DIB */
		if (dibcbp.bitCount == 16)
		{
			if (pbmi->biCompression == BI_BITFIELDS)
				dibcbp.post= convRGB_rgb565;
			else
				dibcbp.post= convRGB_rgb555;
		}

		/* Set callback function */
		if (img.isContig)
		{
			switch (img.photometric)
			{
			case PHOTOMETRIC_YCBCR:
				/* can handle only 8bpp TIFF files (decoding routines constraint)
				    and output > 8bpp because no dithering avail */
				if ((img.bitspersample == 8) && (dibcbp.bitCount > 8))
				{
					dibcbp.ycbcr= initYCbCrConversion(&img, dibcbp.ycbcr);
					if (dibcbp.ycbcr)
					{
						switch (dibcbp.ycbcr->subsamplingType)
						{
						case 0x44:
						case 0x42:
						case 0x41:
						case 0x22:
						case 0x21:
						case 0x11:
							img.callback.contig= putcontig8bitYCbCrtile_rgb888;
							break;
						default:
							sprintf(emsg, "YCbCr subsampling not supported %x", (int) dibcbp.ycbcr->subsamplingType);
							ok= 0;
						}
					}
					else
					{
						ok= 0;
						msg_seen= 1;
					}
				}
				break;
			case PHOTOMETRIC_MINISBLACK:
			case PHOTOMETRIC_MINISWHITE:
			case PHOTOMETRIC_PALETTE:
				/* TIFF formats has a palette, either implicit or explicit given */
				switch (img.bitspersample)
				{
				case 1: /* 1 bit per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 1:
					case 4:
					case 8:
						img.callback.contig = putPALMAP1bit_pal;
						break;
					default:
						break;
					}
					break;
				case 2: /* 2 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 4:
					case 8:
						img.callback.contig = putPALMAP2bit_pal;
						break;
					default:
						break;
					}
					break;
				case 4: /* 4 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 4:
					case 8:
						img.callback.contig = putPALMAP4bit_pal;
						break;
					default:
						break;
					}
					break;
				case 8: /* 8 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 8:
						img.callback.contig = putPALMAP8bit_pal;
						break;
					default:
						break;
					}
					break;
				default:
						break;
				}
				if (img.callback.any)
					break;
				/* if no callback set, output is rgb (palmap will do right thing) */
			case PHOTOMETRIC_RGB:
				/* TIFF formats has no palette */
				switch (img.bitspersample)
				{
				case 1: /* 1 bit per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.contig = putRGBcontig1bittile_rgb;
						break;
					default:
						break;
					}
					break;
				case 2: /* 2 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.contig = putRGBcontig2bittile_rgb;
						break;
					default:
						break;
					}
					break;
				case 4: /* 4 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.contig = putRGBcontig4bittile_rgb;
						break;
					default:
						break;
					}
					break;
				case 8: /* 8 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.contig = putRGBcontig8bittile_rgb;
						break;
					default:
						break;
					}
					break;
				case 16: /* 16 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.contig = putRGBcontig16bittile_rgb;
						break;
					default:
						break;
					}
					break;
				default:
					break;
				}
				break;
			default:
				break;
			}
		}
		else
		{
			/*
			 * Select the appropriate conversion routine for unpacked data.
			 *
			 * NB: we assume that unpacked single channel data is directed
			 *	 to the "packed routines.
			 */
			switch (img.photometric)
			{
			case PHOTOMETRIC_RGB:
				switch (img.bitspersample)
				{
				case 8: /* 8 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.separate = putRGBseparate8bittile_rgb;
						break;
					default:
						break;
					}
					break;
				case 16: /* 16 bits per sample TIF format */
					switch (dibcbp.bitCount)
					{
					case 16:
					case 24:
						img.callback.separate = putRGBseparate16bittile_rgb;
						break;
					default:
						break;
					}
					break;
				default:
					break;
				}
				break;
			default:
				/* separate TIFF formats have no palette ??? */
				break;
			}
		}

		if (!ok)
			break;

		ok = TIFFImageIterGet(&img, &dibcbp, img.width, img.height);
		if (!ok)
		{
			msg_seen= 1;
		}
		break;
	}//end for

	if (dibcbp.pTmp)
		_TIFFfree(dibcbp.pTmp);
	if (dibcbp.palette_map)
		_TIFFfree(dibcbp.palette_map);
	if (dibcbp.ycbcr)
		_TIFFfree(dibcbp.ycbcr);

	TIFFImageIterEnd(&img);

	if ((!ok) && (!msg_seen))
	{
		TIFFError(TIFFFileName(tif), emsg);
	}
	return ok;
}




/*
 * Check the image to see if TIFFReadDIBImage can deal with it
 * and return in *pMatchingDIBFormat the most accurate DIB format to store the image
 * 1/0 is returned according to whether or not the image can
 * be handled.  If 0 is returned, emsg contains the reason
 * why it is being rejected.
 */
TIFF_DLLINTERFACE(int)
TIFFDIBGetFormat(TIFF * tif, int* pMatchingDIBFormat, int* isExact, char emsg[1024])
{
	TIFFImageIter img;
	TIFFDirectory *td = &tif->tif_dir;
	int exact= 1; 
	int ret= 1;

	/* Set up a temporary iterator */
	if (!TIFFImageIterBegin(&img, tif, 0, emsg))
	{
		/* get routines cannot handle tif format, emsg contains reason */
		return 0;
	}

	/* do format checking based on the info in tif structure */
	switch (td->td_bitspersample)
	{
	case 1:
		// can do all out formats
		*pMatchingDIBFormat= TIFDIB_PALETTE_BW;
		break;
	case 2:
	case 4:
		// no 2/4 bit RGB DIBs
		*pMatchingDIBFormat= TIFDIB_RGB555;
		break;
	case 8:
		// accurate is "true-color"
		*pMatchingDIBFormat= TIFDIB_RGB888;
		break;
	case 16:
		// most accurate is "true-color"
		exact= 0; // but info is lost
		*pMatchingDIBFormat= TIFDIB_RGB888;
		break;
	default:
		sprintf(emsg, "Sorry, can not handle images with %d-bit samples",
				td->td_bitspersample);
		ret= 0;
	}
	
	if (ret)
	{
		/* check for palette and gray/b&w TIFF */
		if ((img.photometric == PHOTOMETRIC_PALETTE) ||
			(img.photometric == PHOTOMETRIC_MINISBLACK) || 
			(img.photometric == PHOTOMETRIC_MINISWHITE)
			)
		{
			switch (td->td_bitspersample)
			{
			case 1:
				*pMatchingDIBFormat = TIFDIB_PALETTE_BW;
				break;
			case 2:
				// no 2 bit palette DIBs
			case 4:
				*pMatchingDIBFormat = TIFDIB_PALETTE_16;
				break;
			case 8:
				*pMatchingDIBFormat = TIFDIB_PALETTE_256;
				break;
			case 16:
				// palette with 16 bits/pixel
				// most accurate is "true-color"
				exact= 0; // but info is lost
				*pMatchingDIBFormat= TIFDIB_RGB888;
				break;
			default:
				// checked above
				break;
			}
		}
	}
	TIFFImageIterEnd(&img);
	//TEST
	//*pMatchingDIBFormat= TIFDIB_PALETTE_256;
	return ret;
}

/*
 * Check the image to see if TIFFReadDIBImage can deal with it
 * Input is DIB format we want to have.
 * 1/0 is returned according to whether or not the image can
 * be handled.  If 0 is returned, emsg contains the reason
 * why it is being rejected.
 */
TIFF_DLLINTERFACE(int)
TIFFDIBImageOK(TIFF * tif, int dibFormat, char emsg[1024])
{
	TIFFImageIter img;
	TIFFDirectory *td = &tif->tif_dir;
	int ret= 1;

	if ((dibFormat < TIFDIB_PALETTE_BW) || 
	    (dibFormat > TIFDIB_RGB888))
	{
		sprintf(emsg, "Output DIB format unknown: %d", dibFormat);
		return 0;
	}
	/* Set up a temporary iterator */
	if (!TIFFImageIterBegin(&img, tif, 0, emsg))
	{
		/* get routines cannot handle tif format, emsg contains reason */
		return 0;
	}

	/* check if we can provide an appropriate callback routine */
	switch (td->td_bitspersample)
	{
	case 1:
		break;
	case 2:
	case 4:
		if (dibFormat < TIFDIB_PALETTE_16)
		{
			sprintf(emsg, "16 color palette output DIB format required for %d-bit samples", 
				td->td_bitspersample);
			ret= 0;
		}
		break;
	case 8:
	case 16:
		// we check here for at least 256 color, but if the tif image has no 
		// palette later check fails if TIFDIB_PALETTE_256 (as long as we have no dithering)
		if (dibFormat < TIFDIB_PALETTE_256)
		{
			sprintf(emsg, "256 color palette output DIB format required for %d-bit samples", 
				td->td_bitspersample);
			ret= 0;
		}
		break;
	default:
		sprintf(emsg, "Sorry, can not handle images with %d-bit samples",
				td->td_bitspersample);
		ret= 0;
	}
	
	if (ret)
	{
		if ((img.photometric == PHOTOMETRIC_MINISBLACK) || 
			(img.photometric == PHOTOMETRIC_MINISWHITE))
		{
			// gray tiff
			if ((td->td_bitspersample >= 8) && (dibFormat < TIFDIB_PALETTE_256))
			{
				sprintf(emsg, 
				"PAL256 or higher DIB format required for non-palette %d-bit samples (have no dithering yet)", 
					td->td_bitspersample);
				ret= 0;
			}
			else
			{
				if (
					((td->td_bitspersample == 2) || 
					(td->td_bitspersample == 4)
					) && 
					(dibFormat < TIFDIB_PALETTE_16)
				   )
				{
					sprintf(emsg, 
					"PAL256 or higher DIB format required for non-palette %d-bit samples (have no dithering yet)", 
						td->td_bitspersample);
					ret= 0;
				}
			}
		}
		else
		{
			if ((img.photometric != PHOTOMETRIC_PALETTE) && 
				(td->td_bitspersample >= 8) &&
				(dibFormat < TIFDIB_RGB555))
			{
				sprintf(emsg, 
					"RGB output DIB format required for non-palette %d-bit samples (have no dithering yet)", 
					td->td_bitspersample);
				ret= 0;
			}
		}

	}
	TIFFImageIterEnd(&img);
	return ret;
}


#endif //#ifdef TIF_WIN32_DIB_SUPPORT


#ifdef TIF_RGBA_SUPPORT



/**************************************************************************
 *      RGBA output section
 **************************************************************************/


/*
 * Structure for RGBA image params in callback
 */
typedef struct tagRGBACallbackParams
{
	uint32* pbits;           /* ptr to raster data adjusted so topleft is (0,0) */
	uint32 w;				 /* raster width */
	/* for palette conversions */
	uint32* palette_map;
	/* for YCBCR conversions */
	TIFFYCbCrToRGB* ycbcr;
} RGBACallbackParams;


/**************************************************************************
 *      convert TIFF data to RGBA
 **************************************************************************/

#define UINT32(x) ((uint32)(x))

// 8 bit R/G/B -> RGBA (no alpha)
#define TIFBYTE_2_RGBA(dest_p, r, g, b) \
			*(dest_p) = (UINT32(b) << 16) | \
						(UINT32(g) << 8) | \
						(UINT32(r) << 0); \
			++(dest_p);

// 8 bit R/G/B/A -> RGBA
#define TIFBYTE_A_2_RGBA(dest_p, r, g, b, a) \
			*(dest_p) = (UINT32(a) << 24) | \
						(UINT32(b) << 16) | \
						(UINT32(g) << 8) | \
						(UINT32(r) << 0); \
			++(dest_p);

#define TIFPAL_2_B(pal) ((BYTE) ((pal) >> 16))
#define TIFPAL_2_G(pal) ((BYTE) ((pal) >> 8))
#define TIFPAL_2_R(pal) ((BYTE) (pal))

#define TIFPAL_2_RGBA(cp, pal) \
 			*(cp) = pal; \
			++(cp);

/**************************************************************************
 *      1-bit packed samples
 **************************************************************************/


/*
 * 1-bit packed samples, with and without Map => RGBA
 */
DECLAREContigCallbackFunc(putRGBcontig1bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32* palmap = ppara->palette_map;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;
	if (ppara->palette_map)
	{
		uint32 fromskew_bytes= ASBITS(fromskew);
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 8; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGBA(dest_p, palmap[(b >> 7) & 1]);
					b= b << 1;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 1 bit RGB TIFF
		// Note the code is in no case performance optimized because
		// it's unlikely someone uses such format
		uint32 fromskew_bytes= ASBITS(fromskew*1*3); // really???
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			int bitpos= -1;
			int rgbIndx= 0;  //red
			BYTE c;
			BYTE rgb[3];
			for(;;)
			{
				if (bitpos < 0)
				{
					c= *src_p;
					++src_p;
					bitpos= 7;
				}
				rgb[rgbIndx]= ((c >> bitpos) & 1) * 255;
				++rgbIndx;
				--bitpos;
				if (rgbIndx == 3)
				{
					TIFBYTE_2_RGBA(dest_p, rgb[0], rgb[1], rgb[2]);
					rgbIndx = 0;
					--_x;
					if (_x == 0)
						break;
				}
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
}

/**************************************************************************
 *      2-bit packed samples
 **************************************************************************/


/*
 * 2-bit packed samples, with and without Map => RGBA
 */
DECLAREContigCallbackFunc(putRGBcontig2bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32* palmap = ppara->palette_map;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;
	if (ppara->palette_map)
	{
		uint32 fromskew_bytes= (fromskew + 3) / 4;
		// 2 bit palette TIFF
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 4; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGBA(dest_p, palmap[(b >> 6) & 3]);
					b= b << 2;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 2 bit RGB TIFF
		// Note the code is in no case performance optimized because
		// it's unlikely someone uses such format
		uint32 fromskew_bytes= ASBITS(fromskew*2*3); // really???
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			int bitpos= -2;
			int rgbIndx= 0;  //red
			BYTE c;
			BYTE rgb[3];
			for(;;)
			{
				if (bitpos < 0)
				{
					c= *src_p;
					++src_p;
					bitpos= 6;
				}
				rgb[rgbIndx]= ((c >> bitpos) & 3) * (255/15);
				++rgbIndx;
				bitpos-= 2;
				if (rgbIndx == 3)
				{
					TIFBYTE_2_RGBA(dest_p, rgb[0], rgb[1], rgb[2]);
					rgbIndx = 0;
					--_x;
					if (_x == 0)
						break;
				}
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
}

/**************************************************************************
 *      4-bit packed samples
 **************************************************************************/

/* Note on 4 bit packed RGB samples:
   Seems there is confusion about adding pad bytes in the tif image data stream.
   The tiff spec says it must be padded to byte for 4-bit data. 
   But it seems there is more padding at end of a line in some tiff files.
   So a 425 pixel wide image I got is not shown correctly. 

   (like in other imaging programs too..)
   If anyone knows what's wrong please e-mail me.

  */

/*
 * 4-bit packed samples, with and without Map => RGBA
 */
DECLAREContigCallbackFunc(putRGBcontig4bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
	uint32* dest_p;
	uint32 toskew;
	uint32 _h;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;

	assert(!img->alpha); // no alpha channel here

	if (ppara->palette_map)
	{
		// 4 bit palette TIFF
		uint32* palmap = ppara->palette_map;
		uint32 fromskew_bytes= ASNIBBLES(fromskew);
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				int i;
				BYTE b= *src_p;
				++src_p;
				for (i= 2; (i != 0) && (_x != 0); --i, --_x)
				{
					TIFPAL_2_RGBA(dest_p, palmap[(b >> 4) & 0x0F]);
					b= b << 4;
				}
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		// 4 bit RGB TIFF
		uint32 fromskew_bytes= ASBITS(fromskew * 4 * 3); // really??? se comment above
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x= w;
			while (_x != 0) 
			{
				static const BYTE cvttab[16]= 
				  {0*17, 1*17, 2*17, 3*17, 4*17, 5*17, 6*17, 7*17,
				   8*17, 9*17,10*17,11*17,12*17,13*17,14*17,15*17};
				BYTE c= *src_p; // RG
				BYTE r= cvttab[(c >> 4) & 15];
				BYTE g= cvttab[c & 15];
				BYTE b;
				++src_p;
				c= *src_p; // BR
				++src_p;
				b= cvttab[(c >> 4) & 15];
				TIFBYTE_2_RGBA(dest_p, r, g, b);
				--_x;
				if (_x == 0)
					break;
				r= cvttab[c & 15];
				c= *src_p; // GB
				++src_p;
				g= cvttab[(c >> 4) & 15];
				b= cvttab[c & 15];
				TIFBYTE_2_RGBA(dest_p, r, g, b);
				--_x;
			}
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
}

/**************************************************************************
 *      8-bit packed samples
 **************************************************************************/


/*
 * 8-bit packed samples, with and without Map => RGBA
 */
DECLAREContigCallbackFunc(putRGBcontig8bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;

	if (ppara->palette_map)
	{
		uint32* palmap = ppara->palette_map;

		assert(!img->alpha); // know to have alpha == 0

		fromskew_bytes= fromskew * 1;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFPAL_2_RGBA(dest_p, palmap[*src_p]);
				++src_p;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
	}
	else
	{
		switch (img->alpha)
		{
		case 0:
			fromskew_bytes= fromskew * 3;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					TIFBYTE_2_RGBA(dest_p, src_p[0], src_p[1], src_p[2]);
					src_p += 3;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		case EXTRASAMPLE_ASSOCALPHA:
			fromskew_bytes= fromskew * 4;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					TIFBYTE_A_2_RGBA(dest_p, src_p[0], src_p[1], src_p[2], src_p[3]);
					src_p += 4;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		case EXTRASAMPLE_UNASSALPHA:
			fromskew_bytes= fromskew * 3;
			for (_h= h; _h != 0; --_h)
			{
				uint32 _x; 
				for (_x = w; _x != 0; --_x) 
				{ 
					uint32 a = src_p[3];  // alpha
					u_char r = (u_char)((a * src_p[0]) / 255); // multiply by alpha value
					u_char g = (u_char)((a * src_p[1]) / 255);
					u_char b = (u_char)((a * src_p[2]) / 255);
					TIFBYTE_A_2_RGBA(dest_p, r, g, b, a);
					src_p += 4;
				} 
				dest_p -= toskew;
				src_p += fromskew_bytes;
			}
			break;
		default:
			break;
		}
	}
}


/**************************************************************************
 *      16-bit packed samples
 **************************************************************************/

// 16 bit R/G/B -> RGBA (no alpha)
#define TIFFWORD_2_RGBA(dest_p, r, g, b) \
			*(dest_p) = ((UINT32(b) & 0x0FF00) << 8) | \
						((UINT32(g) & 0x0FF00) << 0) | \
						((UINT32(r) & 0x0FF00) >> 8); \
			++(dest_p);

// 16 bit R/G/B/A -> RGBA
#define TIFFWORD_A_2_RGBA(dest_p, r, g, b, a) \
			*(dest_p) = ((UINT32(a) & 0x0FF00) << 16) | \
						((UINT32(b) & 0x0FF00) << 8) | \
						((UINT32(g) & 0x0FF00) << 0) | \
						((UINT32(r) & 0x0FF00) >> 8); \
			++(dest_p);

/*
 * 16-bit packed samples, no Map => RGBA
 */
DECLAREContigCallbackFunc(putRGBcontig16bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_words;
	uint16* src_wp = (uint16 *) src_p;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;

	switch (img->alpha)
	{
	case 0:
		fromskew_words= fromskew * 3;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGBA(dest_p, src_wp[0], src_wp[1], src_wp[2]);
				src_wp += 3;
			} 
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		fromskew_words= fromskew * 4;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_A_2_RGBA(dest_p, src_wp[0], src_wp[1], src_wp[2], src_wp[3]);
				src_wp += 4;
			} 
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		fromskew_words= fromskew * 3;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			/* We shift alpha down four bits just in case unsigned arithmetic doesn't handle the full range. We
			 * still have plenty of accuracy, since the output is 8 bits. So we have (r * 0xffff) * (a * 0xfff))
			 * = r*a * (0xffff*0xfff) Since we want r*a * 0xff for eight bit output, we divide by (0xffff *
			 * 0xfff) / 0xff == 0x10eff. */
			for (_x = w; _x != 0; --_x) 
			{
				uint32 a = src_wp[3] >> 4;  // alpha
				uint32 r = (src_wp[0] * a) / 0x10eff;
				uint32 g = (src_wp[1] * a) / 0x10eff;
				uint32 b = (src_wp[2] * a) / 0x10eff;
				TIFFWORD_A_2_RGBA(dest_p, r, g, b, a);
				src_wp += 4;
			}
			dest_p -= toskew;
			src_wp += fromskew_words;
		}
		break;
	default:
		break;
	}
}


/**************************************************************************
 *      helper functions for colormaps and gray, b&w maps
 **************************************************************************/

/*
 * Construct any mapping table used
 * by the associated callback routine.
 */
static int
 buildMap_RGBA(TIFFImageIter* img, RGBACallbackParams* ppara)
{
	TIFFRGBValue* convert_map;
	int ret= 1;
	switch (img->photometric)
	{
	case PHOTOMETRIC_MINISBLACK:
	case PHOTOMETRIC_MINISWHITE:
		convert_map= setupMap(img);
		if (!convert_map)
			return 0;
		/* Use photometric mapping table to construct unpacking 
		   tables for samples <= 8 bits. */
		ppara->palette_map= makemap(img, convert_map);
		if (!ppara->palette_map)
			ret= 0;
		/* no longer need Map, free it */
		_TIFFfree(convert_map);
		break;
	case PHOTOMETRIC_PALETTE:
		/* Convert 16-bit colormap to 8-bit (unless it looks like an old-style 8-bit colormap). */
		if (checkcmap(img) == 16)
			cvtcmap(img);
		else
			TIFFWarning(TIFFFileName(img->tif), "Assuming 8-bit colormap");
		/* Use colormap to construct palette map. */
		ppara->palette_map= makemap(img, NULL);
		if (!ppara->palette_map)
			ret= 0;
		break;
	default:
		break;
	}
	return ret;
}


/**************************************************************************
 *      YCBCR decoders
 **************************************************************************/


#define	YCbCrtoRGBA(cp, yc) {             \
    int Y = (yc);                            \
    *(cp) = (UINT32( clamptab[Y+Cbbtab[Cb]] ) << 16) | \
			(UINT32( clamptab[Y + (int)((Cbgtab[Cb]+Crgtab[Cr])>>16)] ) << 8) | \
            (UINT32( clamptab[Y+Crrtab[Cb]] ) << 0);  \
	++(cp);  \
    }

 /*
 * 8-bit YCbCr => RGBA
 */
DECLAREContigCallbackFunc(putcontig8bitYCbCrtile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;
    TIFFYCbCrToRGB* ycbcr = ppara->ycbcr;
    int* Crrtab = ycbcr->Cr_r_tab;
    int* Cbbtab = ycbcr->Cb_b_tab;
    int32* Crgtab = ycbcr->Cr_g_tab;
    int32* Cbgtab = ycbcr->Cb_g_tab;
    TIFFRGBValue* clamptab = ycbcr->clamptab;

	uint32* dest_p;
	uint32* dest1_p;
	uint32 toskew;

	uint32 _h;
	uint32 fromskew_bytes;
	uint32 raster_width= ppara->w;

	dest_p= ppara->pbits - (y * raster_width) + x;
	fromskew_bytes= fromskew * 3;
			
	switch (ppara->ycbcr->subsamplingType)
	{
	case 0x44: /* UNTESTED */
		{
		uint32* dest1_p= dest_p - raster_width;
		uint32* dest2_p= dest1_p - raster_width;
		uint32* dest3_p= dest2_p - raster_width;
		uint32 toskew= 4 * raster_width + w;

		for (_h= h; _h >= 4; _h-= 4)
		{
			uint32 _x; 

			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[16];
				int Cr = src_p[17];
				YCbCrtoRGBA(dest_p, src_p[0]);
				YCbCrtoRGBA(dest_p, src_p[1]);
				YCbCrtoRGBA(dest_p, src_p[2]);
				YCbCrtoRGBA(dest_p, src_p[3]);
				YCbCrtoRGBA(dest1_p, src_p[4]);
				YCbCrtoRGBA(dest1_p, src_p[5]);
				YCbCrtoRGBA(dest1_p, src_p[6]);
				YCbCrtoRGBA(dest1_p, src_p[7]);
				YCbCrtoRGBA(dest2_p, src_p[8]);
				YCbCrtoRGBA(dest2_p, src_p[9]);
				YCbCrtoRGBA(dest2_p, src_p[10]);
				YCbCrtoRGBA(dest2_p, src_p[11]);
				YCbCrtoRGBA(dest3_p, src_p[12]);
				YCbCrtoRGBA(dest3_p, src_p[13]);
				YCbCrtoRGBA(dest3_p, src_p[14]);
				YCbCrtoRGBA(dest3_p, src_p[15]);
				src_p += 18;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			dest2_p -= toskew;
			dest3_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
		}
	case 0x42: /* UNTESTED */
		dest1_p= dest_p - raster_width;
		toskew= 2 * raster_width + w;
		for (_h= h; _h >= 2; _h-= 2)
		{
			uint32 _x; 
			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[8];
				int Cr = src_p[9];
				YCbCrtoRGBA(dest_p, src_p[0]);
				YCbCrtoRGBA(dest_p, src_p[1]);
				YCbCrtoRGBA(dest_p, src_p[2]);
				YCbCrtoRGBA(dest_p, src_p[3]);
				YCbCrtoRGBA(dest1_p, src_p[4]);
				YCbCrtoRGBA(dest1_p, src_p[5]);
				YCbCrtoRGBA(dest1_p, src_p[6]);
				YCbCrtoRGBA(dest1_p, src_p[7]);
				src_p += 10;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x41: /* UNTESTED */
		toskew= raster_width + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 4; _x != 0; --_x) 
			{
				int Cb = src_p[4];
				int Cr = src_p[5];
				YCbCrtoRGBA(dest_p, src_p[0]);
				YCbCrtoRGBA(dest_p, src_p[1]);
				YCbCrtoRGBA(dest_p, src_p[2]);
				YCbCrtoRGBA(dest_p, src_p[3]);
				src_p += 6;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x22: /* UNTESTED */
		dest1_p= dest_p - raster_width;
		toskew= 2 * raster_width + w;
		for (_h= h; _h >= 2; _h-= 2)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[4];
				int Cr = src_p[5];
				YCbCrtoRGBA(dest_p, src_p[0]);
				YCbCrtoRGBA(dest_p, src_p[1]);
				YCbCrtoRGBA(dest1_p, src_p[2]);
				YCbCrtoRGBA(dest1_p, src_p[3]);
				src_p += 6;
			} 
			dest_p -= toskew;
			dest1_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x21: /* UNTESTED */
		toskew= raster_width + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[2];
				int Cr = src_p[3];
				YCbCrtoRGBA(dest_p, src_p[0]);
				YCbCrtoRGBA(dest_p, src_p[1]);
				src_p += 4;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	case 0x11: /* UNTESTED */
		toskew= raster_width + w;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w / 2; _x != 0; --_x) 
			{
				int Cb = src_p[1];
				int Cr = src_p[2];
				YCbCrtoRGBA(dest_p, src_p[0]);
				src_p += 3;
			} 
			dest_p -= toskew;
			src_p += fromskew_bytes;
		}
		break;
	default:
		/* checked before */
		break;
	}
}


/**************************************************************************
 *      TIFF with separated samples => RGBA
 **************************************************************************/

/*
 * 8-bit separate samples, no Map => RGBA
 */
DECLARESepCallbackFunc(putRGBseparate8bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;

	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_bytes;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;

	assert (!ppara->palette_map);
	switch (img->alpha)
	{
	case 0:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFBYTE_2_RGBA(dest_p, *src_r_p, *src_g_p, *src_b_p);
				++src_r_p;
				++src_g_p;
				++src_b_p;
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFBYTE_A_2_RGBA(dest_p, *src_r_p, *src_g_p, *src_b_p, *src_alpha_p);
				++src_r_p;
				++src_g_p;
				++src_b_p;
				++src_alpha_p; // if we would use it
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
			src_alpha_p += fromskew_bytes; // if we would use it
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		fromskew_bytes= fromskew;
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				uint32 a = *src_alpha_p;  // alpha
				u_char r = (u_char)((a * (*src_r_p)) / 255); // multiply by alpha value
				u_char g = (u_char)((a * (*src_g_p)) / 255);
				u_char b = (u_char)((a * (*src_b_p)) / 255);
				TIFBYTE_A_2_RGBA(dest_p, r, g, b, a);
				++src_r_p;
				++src_g_p;
				++src_b_p;
				++src_alpha_p;
			} 
			dest_p -= toskew;
			src_r_p += fromskew_bytes;
			src_g_p += fromskew_bytes;
			src_b_p += fromskew_bytes;
			src_alpha_p += fromskew_bytes;
		}
		break;
	default:
		break;
	}
}

/*
 * 16-bit separate samples, no Map => RGBA
 */
DECLARESepCallbackFunc(putRGBseparate16bittile_rgba)
{
	RGBACallbackParams* ppara = (RGBACallbackParams*) user_data;

	uint32* dest_p;
	uint32 toskew;
	uint32 _h;
	uint32 fromskew_words= fromskew;

	uint16* src_r_wp = (uint16 *) src_r_p;
	uint16* src_g_wp = (uint16 *) src_g_p;
	uint16* src_b_wp = (uint16 *) src_b_p;
	uint16* src_alpha_wp = (uint16 *) src_alpha_p;

	dest_p= ppara->pbits - (y * ppara->w) + x;
	toskew= ppara->w + w;


	switch (img->alpha)
	{
	case 0:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_2_RGBA(dest_p, *src_r_wp, *src_g_wp, *src_b_wp);
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_ASSOCALPHA:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			for (_x = w; _x != 0; --_x) 
			{ 
				TIFFWORD_A_2_RGBA(dest_p, *src_r_wp, *src_g_wp, *src_b_wp, *src_alpha_wp);
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
				++src_alpha_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
			src_alpha_wp += fromskew_words;
		}
		break;
	case EXTRASAMPLE_UNASSALPHA:
		for (_h= h; _h != 0; --_h)
		{
			uint32 _x; 
			/* We shift alpha down four bits just in case unsigned arithmetic doesn't handle the full range. We
			 * still have plenty of accuracy, since the output is 8 bits. So we have (r * 0xffff) * (a * 0xfff))
			 * = r*a * (0xffff*0xfff) Since we want r*a * 0xff for eight bit output, we divide by (0xffff *
			 * 0xfff) / 0xff == 0x10eff. */
			for (_x = w; _x != 0; --_x) 
			{
				uint32 a = (*src_alpha_wp) >> 4;  // alpha
				uint32 r = ((*src_r_wp) * a) / 0x10eff;
				uint32 g = ((*src_g_wp) * a) / 0x10eff;
				uint32 b = ((*src_b_wp) * a) / 0x10eff;
				TIFFWORD_A_2_RGBA(dest_p, r, g, b, a);
				++src_r_wp;
				++src_g_wp;
				++src_b_wp;
				++src_alpha_wp;
			} 
			dest_p -= toskew;
			src_r_wp += fromskew_words;
			src_g_wp += fromskew_words;
			src_b_wp += fromskew_words;
			src_alpha_wp += fromskew_words;
		}
		break;
	default:
		break;
	}
}



/**************************************************************************
 *      RGBA Interface routines
 **************************************************************************/


/*
 * Read the specified image into an RGBA-format raster.
 */
TIFF_DLLINTERFACE(int)
TIFFReadRGBAImage(TIFF* tif,
    uint32 rwidth, uint32 rheight, uint32* raster, int stop)
{
	char emsg[1024];
	TIFFImageIter img;
	int ok= 0;
	int msg_seen= 0;
	RGBACallbackParams rgbacp;

	/* set params used by callback function */

	/* ptr to unit32 raster data */
	rgbacp.pbits= raster + (rheight-1) * rwidth;

	/* params of the raster data */
	rgbacp.w= rwidth;
	rgbacp.palette_map= NULL;
	rgbacp.ycbcr= NULL;

	for (;;)
	{
		ok = TIFFImageIterBegin(&img, tif, stop, emsg);
		if (!ok)
			break;
		if (rwidth < img.width)
		{
			sprintf(emsg, "RGBA width lower than image width");
			ok= 0;
			break;
		}
		if (rheight < img.height)
		{
			sprintf(emsg, "RGBA height lower than image height");
			ok= 0;
			break;
		}

		ok= buildMap_RGBA(&img, &rgbacp);
		if (!ok)
		{
			msg_seen= 1;
			break;
		}

		/* Set callback function */
		if (img.isContig)
		{
			switch (img.photometric)
			{
			case PHOTOMETRIC_YCBCR:
				/* can handle only 8bpp TIFF files (decoding routines constraint) */
				if (img.bitspersample == 8)
				{
					rgbacp.ycbcr= initYCbCrConversion(&img, rgbacp.ycbcr);
					if (rgbacp.ycbcr)
					{
						switch (rgbacp.ycbcr->subsamplingType)
						{
						case 0x44:
						case 0x42:
						case 0x41:
						case 0x22:
						case 0x21:
						case 0x11:
							img.callback.contig= putcontig8bitYCbCrtile_rgba;
							break;
						default:
							sprintf(emsg, "YCbCr subsampling not supported %x", (int) rgbacp.ycbcr->subsamplingType);
							ok= 0;
						}
					}
					else
					{
						ok= 0;
						msg_seen= 1;
					}
				}
				break;
			case PHOTOMETRIC_MINISBLACK:
			case PHOTOMETRIC_MINISWHITE:
			case PHOTOMETRIC_PALETTE:
				/* TIFF formats has a palette, either implicit or explicit given */
				/* output is rgba (palmap will do right thing) */
			case PHOTOMETRIC_RGB:
				/* TIFF formats has no palette */
				switch (img.bitspersample)
				{
				case 1: /* 1 bit per sample TIF format */
					img.callback.contig = putRGBcontig1bittile_rgba;
					break;
				case 2: /* 2 bits per sample TIF format */
					img.callback.contig = putRGBcontig2bittile_rgba;
					break;
				case 4: /* 4 bits per sample TIF format */
					img.callback.contig = putRGBcontig4bittile_rgba;
					break;
				case 8: /* 8 bits per sample TIF format */
					img.callback.contig = putRGBcontig8bittile_rgba;
					break;
				case 16: /* 16 bits per sample TIF format */
					img.callback.contig = putRGBcontig16bittile_rgba;
					break;
				default:
					break;
				}
				break;
			default:
				break;
			}
		}
		else
		{
			/*
			 * Select the appropriate conversion routine for unpacked data.
			 *
			 * NB: we assume that unpacked single channel data is directed
			 *	 to the "packed routines.
			 */
			switch (img.photometric)
			{
			case PHOTOMETRIC_RGB:
				switch (img.bitspersample)
				{
				case 8: /* 8 bits per sample TIF format */
					img.callback.separate = putRGBseparate8bittile_rgba;
					break;
				case 16: /* 16 bits per sample TIF format */
					img.callback.separate = putRGBseparate16bittile_rgba;
					break;
				default:
					break;
				}
				break;
			default:
				/* separate TIFF formats have no palette ??? */
				break;
			}
		}

		if (!ok)
			break;

		ok = TIFFImageIterGet(&img, &rgbacp, img.width, img.height);
		if (!ok)
		{
			msg_seen= 1;
		}
		break;
	}//end for

	if (rgbacp.palette_map)
		_TIFFfree(rgbacp.palette_map);
	if (rgbacp.ycbcr)
		_TIFFfree(rgbacp.ycbcr);

	TIFFImageIterEnd(&img);

	if ((!ok) && (!msg_seen))
	{
		TIFFError(TIFFFileName(tif), emsg);
	}
	return ok;
}



/*
 * Check the image to see if TIFFReadRGBAImage can deal with it
 * 1/0 is returned according to whether or not the image can
 * be handled.  If 0 is returned, emsg contains the reason
 * why it is being rejected.
 */
TIFF_DLLINTERFACE(int)
TIFFRGBAImageOK(TIFF* tif, char emsg[1024])
{
	TIFFImageIter img;
	TIFFDirectory *td = &tif->tif_dir;
	int ret= 1;

	/* Set up a temporary iterator */
	if (!TIFFImageIterBegin(&img, tif, 0, emsg))
	{
		/* get routines cannot handle tif format, emsg contains reason */
		return 0;
	}

	/* check if we can provide an appropriate callback routine */
	switch (td->td_bitspersample)
	{
	case 1:
	case 2:
	case 4:
	case 8:
	case 16:
		break;
	default:
		sprintf(emsg, "Sorry, can not handle images with %d-bit samples",
				td->td_bitspersample);
		ret= 0;
	}
	TIFFImageIterEnd(&img);
	return ret;
}



#endif //#ifdef TIF_RGBA_SUPPORT
