 /*
  * <LIC_AMD_STD>
  * Copyright (C) <years> Advanced Micro Devices, Inc.  All Rights Reserved.
  * </LIC_AMD_STD>
  * 
  * <CTL_AMD_STD>
  * </CTL_AMD_STD>
  * 
  * <DOC_AMD_STD>
  * DirectDraw core routines.
  * </DOC_AMD_STD>
  * 
  */

#include "precomp.h"
#include "gfx_regs.h"
#include "gfx_rtns.h"
#include "gfx_defs.h"

DWORD DdCanCreateSurface(PDD_CANCREATESURFACEDATA lpCanCreateSurface);
DWORD DdCreateSurface(PDD_CREATESURFACEDATA lpCreateSurface);
DWORD DdDestroySurface(PDD_DESTROYSURFACEDATA  lpDestroySurface);
DWORD DdGetBltStatus(PDD_GETBLTSTATUSDATA lpGetBltStatus);
DWORD DdGetFlipStatus(PDD_GETFLIPSTATUSDATA lpGetFlipStatus);
DWORD DdLock(PDD_LOCKDATA lpLock);
DWORD DdMapMemory(PDD_MAPMEMORYDATA lpMapMemory);
DWORD DdSetOverlayPosition(PDD_SETOVERLAYPOSITIONDATA lpSetOverlayPosition);
DWORD DdUpdateOverlay(PDD_UPDATEOVERLAYDATA lpUpdateOverlay);
DWORD DdWaitForVerticalBlank(PDD_WAITFORVERTICALBLANKDATA lpWaitForVerticalBlank);
DWORD DdBlt(PDD_BLTDATA lpBlt);
DWORD DdFlip(PDD_FLIPDATA lpFlip);

BOOL OverlayOK(PDEV *ppdev, DWORD VertScale);

DWORD g_wHeight,g_lLinearPitch, g_wWidth;

// Defines we'll use in the surface's 'dwReserved1' field:

#define DD_RESERVED_DIFFERENTPIXELFORMAT	0x80000000

// FourCC codes supported

#define FOURCC_YUY2   0x32595559
#define FOURCC_YVYU   0x55595659
#define FOURCC_UYVY   0x59565955
#define FOURCC_YV12   0x32315659
#define FOURCC_I420   0x30323449
#define FOURCC_IYUV   0x56555949

// NT is kind enough to pre-calculate the 2-d surface offset as a 'hint' so
// that we don't have to do the following, which would be 6 DIVs per blt:
//
//	  y += (offset / pitch)
//	  x += (offset % pitch) / bytes_per_pixel

#define convertToGlobalCord(x, y, surf) \
{										\
	y += surf->yHint;					\
	x += surf->xHint;					\
}

#define calculateOffset(offset, x, y, surf)         \
{                                                   \
	offset  = (y * surf->lPitch) + surf->fpVidMem;  \
	offset += (x << mode_shift);                    \
}

#define MAX_SURFACES	20
#define VIP_STATE_INIT		0x00000001
#define VIP_STATE_START		0x00000020
#define VIP_STATE_STOP		0x00000040
#define VIP_STATE_CLOSE		0x00000000

int Format = 0;
int vextra, uextra;

DWORD  SurfacePointers[MAX_SURFACES]; // Save the multibuffer overlay pointers - Sandeep Feb 2002

static __inline void
BytesToPixels(DWORD Bpp, LONG Pitch, DWORD Offset, DWORD *OffsetX, DWORD *OffsetY)
{
	*OffsetX = (Offset & (Pitch - 1)) / (Bpp >> 3);
	*OffsetY = Offset / Pitch;
}

extern unsigned char *gfx_virt_vidptr;
extern unsigned char *gfx_virt_fbptr;
extern unsigned char *gfx_virt_regptr;
extern unsigned char *gfx_virt_gpptr;
extern unsigned long  gfx_fb_size;
extern unsigned long  gu2_bpp;
extern unsigned char  mode_shift;
extern unsigned long  ddraw_invalid;
extern unsigned long  ddraw_count;

#define GU2_WAIT_PENDING while(READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_PENDING)
#define GU2_WAIT_BUSY while(READ_GP32(MGP_BLT_STATUS) & MGP_BS_BLT_BUSY)

int AddDDrawBitmap (PDEV *ppdev, unsigned long bitmap_size, unsigned long pitch);
void ResetDDrawHeap (void);
DWORD DdGetDriverInfo(PDD_GETDRIVERINFODATA lpData);

/*----------------------------------------------------------------------
 * DrvGetDirectDrawInfo
 *
 * Will be called twice before DrvEnableDirectDraw is called.
 *----------------------------------------------------------------------*/

BOOL DrvGetDirectDrawInfo(
	DHPDEV			dhpdev,
	DD_HALINFO* 	pHalInfo,
	DWORD*			pdwNumHeaps,
	VIDEOMEMORY*	pvmList,			// Will be NULL on first call
	DWORD*			pdwNumFourCC,
	DWORD*			pdwFourCC)			// Will be NULL on first call
{
	PDEV *ppdev = NULL;
	ULONG i;
	int availOSM;

	ppdev = (PDEV*) dhpdev;

	DISPDBG((ppdev, 1, "DrvGetDirectDrawInfo Entry\n"));

	pHalInfo->dwSize = sizeof(*pHalInfo);

	/* FILL IN HALINFO                                                          */
	/* Current primary surface attributes.	Since HalInfo is zero-initialized   */
	/* by GDI, we only have to fill in the fields which should be non-zero:     */

	pHalInfo->vmiData.pvPrimary 	  = gfx_virt_fbptr;
	pHalInfo->vmiData.dwDisplayWidth  = ppdev->cxScreen;
	pHalInfo->vmiData.dwDisplayHeight = ppdev->cyScreen;
	pHalInfo->vmiData.lDisplayPitch   = ppdev->lDelta;

	pHalInfo->vmiData.ddpfDisplay.dwSize  = sizeof(DDPIXELFORMAT);
	pHalInfo->vmiData.ddpfDisplay.dwFlags = DDPF_RGB;

	pHalInfo->vmiData.ddpfDisplay.dwRGBBitCount = ppdev->cBitsPerPel;

	if (ppdev->iBitmapFormat == BMF_8BPP)
	{
		pHalInfo->vmiData.ddpfDisplay.dwFlags |= DDPF_PALETTEINDEXED8;
	}

	/* RGB MASKS */
	/* These masks will be zero at BPP */

	pHalInfo->vmiData.ddpfDisplay.dwRBitMask = ppdev->flRed;
	pHalInfo->vmiData.ddpfDisplay.dwGBitMask = ppdev->flGreen;
	pHalInfo->vmiData.ddpfDisplay.dwBBitMask = ppdev->flBlue;

	// SELF-MANAGED HEAP FOR OFFSCREEN MEMORY 

	*pdwNumHeaps = 0;

	if (pvmList)
    {
        pvmList->dwFlags           = VIDMEM_ISLINEAR;
        pvmList->fpStart           = heap_start;
        pvmList->fpEnd             = heap_end - 1;
        pvmList->ddsCaps.dwCaps    = 0;
        pvmList->ddsCapsAlt.dwCaps = 0;
    }

	/* FORCE DWORD ALIGNMENT FOR OFFSCREEN SURFACES */

	pHalInfo->vmiData.dwOffscreenAlign = 4;

	/* --- GRAPHICS CAPABILITIES --- */

	pHalInfo->ddCaps.dwCaps = DDCAPS_BLT
							| DDCAPS_BLTCOLORFILL
							| DDCAPS_COLORKEY;

	pHalInfo->ddCaps.dwCKeyCaps = DDCKEYCAPS_SRCBLT;

	pHalInfo->ddCaps.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN
									| DDSCAPS_PRIMARYSURFACE
									| DDSCAPS_FLIP 
									| DDSCAPS_COMPLEX;

	/* --- VIDEO CAPABILITIES --- */

	/* FORCE 8-BYTE ALIGNMENT FOR OVERLAY DATA */
	
	*pdwNumFourCC = 6;
		
	if (pdwFourCC)
	{
		DISPDBG((ppdev, 200, "Setting fourcc types\n"));
		pdwFourCC[0] = FOURCC_I420;
		pdwFourCC[1] = FOURCC_IYUV;
		pdwFourCC[2] = FOURCC_YV12;
		pdwFourCC[3] = FOURCC_YUY2;
		pdwFourCC[4] = FOURCC_UYVY;
		pdwFourCC[5] = FOURCC_YVYU;
	}

    /* ADVANCED INFO CALLBACK */

    pHalInfo->GetDriverInfo = DdGetDriverInfo;
    pHalInfo->dwFlags = DDHALINFO_GETDRIVERINFOSET;

	pHalInfo->ddCaps.dwCKeyCaps |= DDCKEYCAPS_DESTOVERLAY          |
								   DDCKEYCAPS_DESTOVERLAYONEACTIVE |
								   DDCKEYCAPS_NOCOSTOVERLAY        |
								   DDCKEYCAPS_SRCOVERLAY           |
                                   DDCKEYCAPS_SRCOVERLAYONEACTIVE  |
                                   DDCKEYCAPS_SRCOVERLAYYUV;

	pHalInfo->ddCaps.dwFXCaps |= DDFXCAPS_OVERLAYARITHSTRETCHY |
								 DDFXCAPS_OVERLAYSTRETCHX      |
                                 DDFXCAPS_OVERLAYSTRETCHY;

	pHalInfo->vmiData.dwOverlayAlign = 8;

	pHalInfo->ddCaps.dwCaps |= DDCAPS_OVERLAY            | 
		                       DDCAPS_OVERLAYSTRETCH     |
							   DDCAPS_OVERLAYFOURCC      |
							   DDCAPS_COLORKEYHWASSIST;

	pHalInfo->ddCaps.ddsCaps.dwCaps |= DDSCAPS_OVERLAY;

    pHalInfo->ddCaps.dwCaps2 = DDCAPS2_COPYFOURCC;

	pHalInfo->ddCaps.dwMaxVisibleOverlays = 1;

	pHalInfo->ddCaps.dwMinOverlayStretch   = 1000; /* values copied from win95 */
	pHalInfo->ddCaps.dwMinLiveVideoStretch = 1000;
	pHalInfo->ddCaps.dwMinHwCodecStretch   = 1000;

	pHalInfo->ddCaps.dwMaxOverlayStretch   = 8000;
	pHalInfo->ddCaps.dwMaxLiveVideoStretch = 8000;
	pHalInfo->ddCaps.dwMaxHwCodecStretch   = 8000;

	DISPDBG((ppdev, 300, "DrvGetDirectDrawInfo leaves\n"));
	return TRUE;
}

/*--------------------------------------------------------------------
 * DrvEnableDirectDraw
 *
 * This function is called by GDI to enable DirectDraw when a DirectDraw
 * program is started and DirectDraw is not already active.
 *--------------------------------------------------------------------*/

 BOOL DrvEnableDirectDraw(
	DHPDEV					dhpdev,
	DD_CALLBACKS*			pCallBacks,
	DD_SURFACECALLBACKS*	pSurfaceCallBacks,
	DD_PALETTECALLBACKS*	pPaletteCallBacks)
{
	PDEV	*ppdev;

	ppdev = (PDEV*) dhpdev;
	DISPDBG ((ppdev, 1, "DrvEnableDirectDraw Entry\n"));

	/* GRAPHICS CALLBACKS */

	pCallBacks->WaitForVerticalBlank = DdWaitForVerticalBlank;
	pCallBacks->MapMemory			 = DdMapMemory;
	pCallBacks->dwFlags 			 = DDHAL_CB32_WAITFORVERTICALBLANK
									 | DDHAL_CB32_MAPMEMORY;
	pSurfaceCallBacks->Blt			 = DdBlt;
	pSurfaceCallBacks->Flip 		 = DdFlip;
	pSurfaceCallBacks->Lock 		 = DdLock;
	pSurfaceCallBacks->GetBltStatus  = DdGetBltStatus;
	pSurfaceCallBacks->GetFlipStatus = DdGetFlipStatus;
	pSurfaceCallBacks->dwFlags		 =	DDHAL_SURFCB32_BLT
									 |  DDHAL_SURFCB32_FLIP
									 | DDHAL_SURFCB32_LOCK
									 | DDHAL_SURFCB32_GETBLTSTATUS
									 | DDHAL_SURFCB32_GETFLIPSTATUS;

	/* VIDEO CALLBACKS */

	pCallBacks->CanCreateSurface	  = DdCanCreateSurface;
	pCallBacks->CreateSurface		  = DdCreateSurface;
	pCallBacks->dwFlags 			  |= DDHAL_CB32_CANCREATESURFACE |
	  								     DDHAL_CB32_CREATESURFACE;

	pSurfaceCallBacks->UpdateOverlay		  = DdUpdateOverlay;
	pSurfaceCallBacks->SetOverlayPosition	  = DdSetOverlayPosition;
	pSurfaceCallBacks->DestroySurface		  = DdDestroySurface;
	pSurfaceCallBacks->dwFlags			|= DDHAL_SURFCB32_UPDATEOVERLAY |
										   DDHAL_SURFCB32_SETOVERLAYPOSITION |
										   DDHAL_SURFCB32_DESTROYSURFACE;

	DISPDBG((ppdev, 300, "DrvEnableDirectDraw leaves\n"));

	ppdev->VideoFlipRecord.bFlipFlag    = FALSE;
	ppdev->GraphicsFlipRecord.bFlipFlag = FALSE;

	return TRUE;
}

/*------------------------------------------------------------------------
 * DrvDisableDirectDraw
 *
 * This function is called by GDI when the last active DirectDraw program
 * is quit and DirectDraw will no longer be active.
 *------------------------------------------------------------------------*/

VOID DrvDisableDirectDraw (DHPDEV dhpdev)
{
	PDEV *ppdev = (PDEV*) dhpdev;

	DISPDBG ((ppdev, 1, "DrvDisableDirectDraw Entry\n"));

	ppdev->myfuncs.pfn_set_video_enable (0);
}

/*------------------------------------------------------------------------
 * FlipStatus
 *
 * Returns TRUE if flip has occured, else FALSE.
 *------------------------------------------------------------------------*/

BOOL FlipStatus (PDEV *ppdev, FLATPTR fpVidMem)
{
	DWORD CfgData;
	UCHAR reg_data;
	WORD time_out_count = 0;

	DISPDBG((ppdev, 200, "FlipStatus\n"));

	/* CHECK IF THE NEW OFFSET HAS BEEN LATCHED */

	if (MEM_READ_REG32 (gfx_virt_regptr, MDC_LINE_CNT_STATUS) & MDC_LNCNT_FLIP)
		return TRUE;
		
	return FALSE;
}

/*-------------------------------------------------------------------------
 * drvalUpdateFlipStatus
 *
 * Checks and sees if the most recent display flip has occurred.  This routine
 * is not called for video flips.
 *------------------------------------------------------------------------*/

HRESULT ddrvalUpdateFlipStatus(
	PDEV	*ppdev,
	FLATPTR fpVidMem)
{
	HRESULT ret = DD_OK;

	DISPDBG((ppdev, 200, "ddrvalUpdateFlipStatus\n"));

	if ((ppdev->GraphicsFlipRecord.bFlipFlag) && (fpVidMem == ppdev->GraphicsFlipRecord.fpFlipFrom))
	{
		if (MEM_READ_REG32 (gfx_virt_regptr, MDC_LINE_CNT_STATUS) & MDC_LNCNT_FLIP)
		{
			/* FLIP FINISHED */

			ppdev->GraphicsFlipRecord.bFlipFlag = FALSE;	
		} 
		else 
		{
			/* FLIP STILL IN PROGRESS */

			ret = DDERR_WASSTILLDRAWING;
		}
	} 
	else 
	{
		/* NO WORK */

		DISPDBG((ppdev, 200, "no status, flip %d, fpVidMem %x, flipFrom %x\n",
			 ppdev->GraphicsFlipRecord.bFlipFlag, fpVidMem, ppdev->GraphicsFlipRecord.fpFlipFrom));
	}

	DISPDBG((ppdev, 200, "ddrvalUpdateFlipStatus leaves\n"));

	return(ret);
}

/*------------------------------------------------------------------------
 * DdBlt
 *
 * Generic BitBlt routine.
 *------------------------------------------------------------------------*/

DWORD DdBlt (PDD_BLTDATA lpBlt)
{
	PDD_SURFACE_GLOBAL	srcSurf;
	PDD_SURFACE_GLOBAL	dstSurf;
	PDEV*				ppdev = NULL;
	HRESULT 			ddrval;
	DWORD				dstX;
	DWORD				dstY;
	DWORD				dwFlags;
	DWORD				dstWidth;
	DWORD				dstHeight;
	DWORD				srcWidth;
	DWORD				srcHeight;
	DWORD				srcX;
	DWORD				srcY;
	DWORD				srcLinear;
	DWORD				dstLinear;
	ULONG               dstOffset, srcOffset;

	ppdev	 = (PDEV*) lpBlt->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdBlt\n"));

	dstSurf = lpBlt->lpDDDestSurface->lpGbl;
	
	DISPDBG((ppdev, 200, "rops: rop %x, ddrop %x\n", lpBlt->bltFX.dwROP, lpBlt->bltFX.dwDDROP));

	/* IS A FLIP IN PROGRESS? */
	
	ddrval = ddrvalUpdateFlipStatus(ppdev, dstSurf->fpVidMem);
	if (ddrval != DD_OK)
	{
		lpBlt->ddRVal = ddrval;
		DISPDBG((ppdev, 200, "DdBlt leaves, flip in progress\n"));
		return(DDHAL_DRIVER_HANDLED);
	}

	dwFlags = lpBlt->dwFlags;
	DISPDBG((ppdev, 200, "dwFlags %x\n", dwFlags));
	if (dwFlags & DDBLT_ASYNC)
	{
		/* CHECK ASYNCHRONOUS */
		/* If async, then only work if we won't have to wait on the */
		/* accelerator to start the command.                        */
		
		if (BLIT_ENGINE_BUSY)
		{
			lpBlt->ddRVal = DDERR_WASSTILLDRAWING;
			DISPDBG((ppdev, 200, "DdBlt leaves, async blit not possible\n"));
			return (DDHAL_DRIVER_HANDLED);
		}
	}

	/* DEST RECT */

	dstX	  = lpBlt->rDest.left;
	dstY	  = lpBlt->rDest.top;
	dstWidth  = lpBlt->rDest.right  - lpBlt->rDest.left;
	dstHeight = lpBlt->rDest.bottom - lpBlt->rDest.top;

	DISPDBG((ppdev, 200, "dest org (%d,%d), size (%d,%d)\n",
				dstX, dstY, dstWidth, dstHeight));

	if (dwFlags & DDBLT_COLORFILL)
	{
		/* CHECK PIXEL FORMAT */
		/* The GX can't do color fills for off-screen surfaces that       */
		/* are a different pixel format than that of the primary display: */

		if (dstSurf->dwReserved1)
		{
			DISPDBG((ppdev, 2000, "Can't do colorfill to odd pixel format\n"));
			return(DDHAL_DRIVER_NOTHANDLED);
		}
		else
		{
			/* SOLID FILL */

			calculateOffset (dstOffset, dstX, dstY, dstSurf);

			GU2_WAIT_PENDING;
			WRITE_GP32 (MGP_RASTER_MODE, (gu2_bpp | 0xF0));
			WRITE_GP32 (MGP_PAT_COLOR_0, lpBlt->bltFX.dwFillColor);
			WRITE_GP32 (MGP_DST_OFFSET, dstOffset);
			WRITE_GP32 (MGP_WID_HEIGHT, ((dstWidth << 16) | (dstHeight & 0xFFFF)));
			WRITE_GP32 (MGP_STRIDE, dstSurf->lPitch);
			WRITE_GP32 (MGP_BLT_MODE, 0);
			
			lpBlt->ddRVal = DD_OK;
			return(DDHAL_DRIVER_HANDLED);
		}
	}

	/* CHECK REMAINING COMMAND */
	/* We specified with Our ddCaps.dwCaps that we handle a limited number       */
	/* of commands, and by this point in our routine we've handled everything    */
	/* except DDBLT_ROP.  DirectDraw and GDI shouldn't pass us anything          */
	/* else; we'll assert on debug builds to prove this:                         */

	ASSERTVDD((dwFlags & DDBLT_ROP) && (lpBlt->lpDDSrcSurface),
		"Expected dwFlags commands of only DDBLT_ASYNC and DDBLT_COLORFILL");

	/* GET OFFSET, WIDTH AND HEIGHT FOR SOURCE */

	srcSurf 	 = lpBlt->lpDDSrcSurface->lpGbl;
	srcX		 = lpBlt->rSrc.left;
	srcY		 = lpBlt->rSrc.top;
	srcWidth	 = lpBlt->rSrc.right - lpBlt->rSrc.left;
	srcHeight	 = lpBlt->rSrc.bottom - lpBlt->rSrc.top;
	
	if ((srcWidth  == dstWidth)  &&
		(srcHeight == dstHeight) &&
		(srcSurf->dwReserved1 == dstSurf->dwReserved1))
	{
		DISPDBG((ppdev, 200, "Screen to screen BLT\n"));

		// SCREEN TO SCREEN BLT 

        if (srcSurf->dwReserved1 != 0)
        {
            if ((srcSurf->dwReserved1 & 0xF) >= VIDEO_FORMAT_Y0Y1Y2Y3 &&
                (srcSurf->dwReserved1 & 0xF) <= VIDEO_FORMAT_Y1Y2Y3Y0)
            {
                ULONG dstSurfHeight;
                ULONG srcSurfHeight;
                ULONG yextra, uextra;
                ULONG bltFlags;

                dstSurfHeight = lpBlt->lpDDDestSurface->lpGbl->wHeight;
                srcSurfHeight = lpBlt->lpDDSrcSurface->lpGbl->wHeight;
                                
                bltFlags = 0;
                yextra = uextra = 0;
		        if (dstSurf == srcSurf)
		        {
                    if (dstY > srcY) 
                    {
                        yextra = (dstHeight - 1) * srcSurf->lPitch;
                        uextra = ((dstHeight >> 1) - 1) * (srcSurf->lPitch >> 1);
                        bltFlags  = MGP_BM_NEG_YDIR;
                    }
                    else if (dstY == srcY && dstX > srcX)
                    {
                        yextra =  dstWidth - 1;
                        uextra = (dstWidth >> 1) - 1;
                        bltFlags  = MGP_BM_NEG_XDIR;
                    }
                }

                // 4:2:0 Video - Y, U and V are stored in separate buffers.
                //
                dstOffset  = (dstY * dstSurf->lPitch) + dstSurf->fpVidMem;
	            dstOffset +=  dstX;
                srcOffset  = (srcY * srcSurf->lPitch) + srcSurf->fpVidMem;
	            srcOffset +=  srcX;

                // Copy Y data
                //
                GU2_WAIT_BUSY;
                WRITE_GP32 (MGP_RASTER_MODE, 0xCC);
                WRITE_GP32 (MGP_STRIDE, ((srcSurf->lPitch << 16) | (dstSurf->lPitch & 0xFFFF)));
                WRITE_GP32 (MGP_WID_HEIGHT, (dstWidth << 16) | dstHeight);
                WRITE_GP32 (MGP_DST_OFFSET, dstOffset + yextra);
                WRITE_GP32 (MGP_SRC_OFFSET, srcOffset + yextra);
                WRITE_GP32 (MGP_BLT_MODE, MGP_BM_SRC_FB | bltFlags);

                // Copy U Data
                //
                dstOffset  = dstSurf->fpVidMem + (dstSurfHeight * dstSurf->lPitch);
                srcOffset  = srcSurf->fpVidMem + (srcSurfHeight * srcSurf->lPitch);
                dstOffset += (dstY * (dstSurf->lPitch >> 1)) + (dstX >> 1);
                srcOffset += (srcY * (srcSurf->lPitch >> 1)) + (srcX >> 1);
                
                GU2_WAIT_PENDING;
                WRITE_GP32 (MGP_STRIDE, (((srcSurf->lPitch >> 1) << 16) | ((dstSurf->lPitch >> 1) & 0xFFFF)));
                WRITE_GP32 (MGP_WID_HEIGHT, ((dstWidth >> 1) << 16) | (dstHeight >> 1));
                WRITE_GP32 (MGP_DST_OFFSET, dstOffset + uextra);
                WRITE_GP32 (MGP_SRC_OFFSET, srcOffset + uextra);
                WRITE_GP32 (MGP_BLT_MODE, MGP_BM_SRC_FB | bltFlags);
    
                // Copy V Data
                //
                dstOffset  = dstSurf->fpVidMem + (dstSurfHeight * dstSurf->lPitch) + ((dstSurfHeight >> 1) * (dstSurf->lPitch >> 1));
                srcOffset  = srcSurf->fpVidMem + (srcSurfHeight * srcSurf->lPitch) + ((srcSurfHeight >> 1) * (srcSurf->lPitch >> 1));
                dstOffset += (dstY * (dstSurf->lPitch >> 1)) + (dstX >> 1);
                srcOffset += (srcY * (srcSurf->lPitch >> 1)) + (srcX >> 1);
                
                GU2_WAIT_PENDING;
                WRITE_GP32 (MGP_DST_OFFSET, dstOffset + uextra);
                WRITE_GP32 (MGP_SRC_OFFSET, srcOffset + uextra);
                WRITE_GP32 (MGP_BLT_MODE, MGP_BM_SRC_FB | bltFlags);

                GU2_WAIT_BUSY;
                WRITE_GP32 (MGP_RASTER_MODE, gu2_bpp);
                
                lpBlt->ddRVal = DD_OK;

	            DISPDBG((ppdev, 200, "DdBlt leaves\n"));

	            return DDHAL_DRIVER_HANDLED;
            }

            // FOURCC COPY 
            // Data is 16-bit instead of native display BPP 

            dstOffset  = (dstY * dstSurf->lPitch) + dstSurf->fpVidMem;
	        dstOffset += (dstX << 1);
            srcOffset  = (srcY * srcSurf->lPitch) + srcSurf->fpVidMem;
	        srcOffset += (srcX << 1);

            if      (ppdev->cPelSize == 2) dstWidth >>= 1;
            else if (ppdev->cPelSize == 0) dstWidth <<= 1;
        }
        else
        {
		    calculateOffset (dstOffset, dstX, dstY, dstSurf);
		    calculateOffset (srcOffset, srcX, srcY, srcSurf);
        }
			
		GU2_WAIT_PENDING;

		if (dwFlags & DDBLT_KEYSRCOVERRIDE)
		{
			/* TRANSPARENT BLT */
			
			WRITE_GP32 (MGP_RASTER_MODE, (gu2_bpp | 0xCC | MGP_RM_SRC_TRANS));
			WRITE_GP32 (MGP_SRC_COLOR_FG, lpBlt->bltFX.ddckSrcColorkey.dwColorSpaceLowValue & 0xFFFFFF);
			WRITE_GP32 (MGP_SRC_COLOR_BG, 0xFFFFFF);
		}
		else
		{
			WRITE_GP32 (MGP_RASTER_MODE, (gu2_bpp | 0xCC));
		}
		WRITE_GP32 (MGP_WID_HEIGHT, (dstWidth << 16) | dstHeight);
		WRITE_GP32 (MGP_STRIDE, ((srcSurf->lPitch << 16) | (dstSurf->lPitch & 0xFFFF)));
		
		if (dstSurf == srcSurf)
		{
			unsigned long blt_mode = MGP_BM_SRC_FB;
			if (dstX > srcX)
			{
				blt_mode |= MGP_BM_NEG_XDIR;

				srcOffset += (dstWidth << mode_shift) - 1;
				dstOffset += (dstWidth << mode_shift) - 1;
			}
			if (dstY > srcY)
			{
				blt_mode |= MGP_BM_NEG_YDIR;
				
				srcOffset += (dstHeight - 1) * srcSurf->lPitch;
				dstOffset += (dstHeight - 1) * dstSurf->lPitch;
			}
			WRITE_GP32 (MGP_DST_OFFSET, dstOffset);
			WRITE_GP32 (MGP_SRC_OFFSET, srcOffset);
			WRITE_GP32 (MGP_BLT_MODE, blt_mode);
		}
		else
		{
			WRITE_GP32 (MGP_DST_OFFSET, dstOffset);
			WRITE_GP32 (MGP_SRC_OFFSET, srcOffset);
			WRITE_GP32 (MGP_BLT_MODE, MGP_BM_SRC_FB);
		}
	}
	else
	{
		DISPDBG((ppdev, 300, "Unhandled blit, dwFlags %x\n", dwFlags));
	}

	lpBlt->ddRVal = DD_OK;

	DISPDBG((ppdev, 200, "DdBlt leaves\n"));

	return(DDHAL_DRIVER_HANDLED);
}

/*----------------------------------------------------------------------
 * DWORD DdFlip
 *
 * Note that lpSurfCurr may not necessarily be valid.
 *----------------------------------------------------------------------*/

DWORD DdFlip (PDD_FLIPDATA lpFlip)
{
	PDEV *ppdev;
	HRESULT ddrval;
	ULONG ulMemoryOffset;
	ULONG ulLowOffset;
	ULONG ulMiddleOffset;
	ULONG ulHighOffset;
	
	ppdev	 = (PDEV*) lpFlip->lpDD->dhpdev;

	DISPDBG((ppdev, 250, "DdFlip\n"));

	ulMemoryOffset = lpFlip->lpSurfTarg->lpGbl->fpVidMem;

	if (lpFlip->lpSurfCurr->ddsCaps.dwCaps & DDSCAPS_OVERLAY) 
	{
		DISPDBG((ppdev, 200, "Overlay flip\n"));
		
		if (lpFlip->lpSurfCurr->lpGbl->fpVidMem == ppdev->VideoFlipRecord.fpFlipFrom)
		{
			/* CHECK FOR PREVIOUS VIDEO FLIP COMPLETED */
			/* We are not checking for the drawing engine busy.  The assumption */
			/* is that the GP is not used to transfer video surfaces as we      */
			/* do not have the copy FOURCC bit set.                             */

			if (ppdev->VideoFlipRecord.bFlipFlag)
			{
				if (!(MEM_READ_REG32 (gfx_virt_regptr, MDC_LINE_CNT_STATUS) & MDC_LNCNT_VFLIP))
				{
					lpFlip->ddRVal = DDERR_WASSTILLDRAWING;
					return(DDHAL_DRIVER_HANDLED);
				}
			}

			/* REMEMBER THE FLIP */
			
			ppdev->VideoFlipRecord.bFlipFlag  = TRUE;
			ppdev->VideoFlipRecord.fpFlipFrom = ulMemoryOffset;

			SetVideoOffset (ppdev, ulMemoryOffset);
		}
	} 
	else 
	{
		/* WAIT FOR PREVIOUS FLIP */
	
		if (ppdev->GraphicsFlipRecord.bFlipFlag)
		{
			if (!(MEM_READ_REG32 (gfx_virt_regptr, MDC_LINE_CNT_STATUS) & MDC_LNCNT_FLIP))
			{
				lpFlip->ddRVal = DDERR_WASSTILLDRAWING;
				return(DDHAL_DRIVER_HANDLED);
			}
		}
		
		/* DISABLE COMPRESSION */
		/* Compression should be disabled when running a full-screen DDraw app.  */
		/* Unfortunately, there is no good way to know how to detect the app, as */
		/* DirectDraw is enabled when the driver loads.  So, when we flip the    */
		/* active display, we realize the desktop is no more and turn it off.    */

		/* HARDCODED REGISTER READ */

		if (MEM_READ_REG32 (gfx_virt_regptr, MDC_GENERAL_CFG) & MDC_GCFG_CMPE)
		{
			ppdev->myfuncs.pfn_set_compression_enable (0);
		}

		DISPDBG((ppdev, 200, "Ddraw flip\n"));

		ppdev->myfuncs.pfn_set_display_offset (ulMemoryOffset);
		
		/* REMEMBER THE FLIP */

		ppdev->GraphicsFlipRecord.bFlipFlag  = TRUE;
		ppdev->GraphicsFlipRecord.fpFlipFrom = lpFlip->lpSurfCurr->lpGbl->fpVidMem;
	}

	lpFlip->ddRVal = DD_OK;

	DISPDBG((ppdev, 250, "DdFlip leaves\n"));

	return(DDHAL_DRIVER_HANDLED);
}

/*-----------------------------------------------------------------------------
 * DdLock
 *-----------------------------------------------------------------------------*/

DWORD DdLock(PDD_LOCKDATA lpLock)
{
	PDEV	*ppdev = NULL;
	HRESULT ddrval;

	ppdev = (PDEV*) lpLock->lpDD->dhpdev;
	DISPDBG((ppdev, 200, "DdLock\n"));

	/* CHECK FLIP STATUS */
	/* Check to see if any pending physical flip has occurred.	Don't allow */
	/* a lock if a flip is in progress:                                      */

	ddrval = ddrvalUpdateFlipStatus(ppdev, lpLock->lpDDSurface->lpGbl->fpVidMem);
	if (ddrval != DD_OK)
	{
		lpLock->ddRVal = DDERR_WASSTILLDRAWING;
		return(DDHAL_DRIVER_HANDLED);
	}

	/* Here's one of the places where the Windows 95 and Windows NT DirectDraw    */
	/* implementations differ: on Windows NT, you should watch for                */
	/* DDLOCK_WAIT and loop in the driver while the accelerator is busy.          */
	/* On Windows 95, it doesn't really matter.                                   */
	/*                                                                            */
	/* (The reason is that Windows NT allows applications to draw directly        */
	/* to the frame buffer even while the accelerator is running, and does        */
	/* not synchronize everything on the Win16Lock.  Note that on Windows NT,     */
	/* it is even possible for multiple threads to be holding different           */
	/* DirectDraw surface locks at the same time.)                                */

	if (lpLock->dwFlags & DDLOCK_WAIT)
	{
		WAIT_BLIT_BUFFER;
	}
	else if (BLIT_BUFFER_BUSY)
	{
		lpLock->ddRVal = DDERR_WASSTILLDRAWING;
		return(DDHAL_DRIVER_HANDLED);
	}

	DISPDBG((ppdev, 200, "DdLock leaves\n"));

	return(DDHAL_DRIVER_NOTHANDLED);
}

/*------------------------------------------------------------------------
 * DdGetBltStatus
 *
 * Doesn't currently really care what surface is specified, just checks
 * and goes.
 *------------------------------------------------------------------------*/

DWORD DdGetBltStatus (PDD_GETBLTSTATUSDATA lpGetBltStatus)
{
	PDEV	*ppdev;
	HRESULT ddRVal;

	ppdev	 = (PDEV*) lpGetBltStatus->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdGetBltStatus\n"));

	ddRVal = DD_OK;

	if (lpGetBltStatus->dwFlags == DDGBS_CANBLT)
	{
		/* DDGBS_CANBLT -> CAN WE ADD A BLT */

		ddRVal = ddrvalUpdateFlipStatus(ppdev,
						lpGetBltStatus->lpDDSurface->lpGbl->fpVidMem);

		if (ddRVal == DD_OK)
		{
			/* NO FLIP, SO CAN WE ADD A BLT */

			if (BLIT_ENGINE_BUSY)
			{
				ddRVal = DDERR_WASSTILLDRAWING;
			}
		}
	}
	else
	{
		/* DDGBS_ISBLTDONE -> IS A BLT IN PROGRESS? */

		if (BLIT_BUFFER_BUSY)
		{
			ddRVal = DDERR_WASSTILLDRAWING;
		}
	}

	lpGetBltStatus->ddRVal = ddRVal;

	DISPDBG((ppdev, 200, "DdGetBltStatus leaves\n"));

	return(DDHAL_DRIVER_HANDLED);
}

/*------------------------------------------------------------------------
 * DdMapMemory
 *
 * This is a new DDI call specific to Windows NT that is used to map
 * or unmap all the application modifiable portions of the frame buffer
 * into the specified process's address space.
 *------------------------------------------------------------------------*/

DWORD DdMapMemory(PDD_MAPMEMORYDATA lpMapMemory)
{
	PDEV*							ppdev;
	VIDEO_SHARE_MEMORY				ShareMemory;
	VIDEO_SHARE_MEMORY_INFORMATION	ShareMemoryInformation;
	DWORD							ReturnedDataLength;
	DWORD                           heap_size;

	ppdev = (PDEV*) lpMapMemory->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdMapMemory\n"));
	DISPDBG((ppdev, 200, "hProc %x, fpProc %x, bMap %d\n", lpMapMemory->hProcess,
			lpMapMemory->fpProcess, lpMapMemory->bMap));

	/* BMAP == TRUE -> MAP THE MEMORY */
	
	if (lpMapMemory->bMap)
	{
		DISPDBG((ppdev, 200, "mapping memory\n"));

		ShareMemory.ProcessHandle = lpMapMemory->hProcess;

		/* REQUESTEDVIRTUALADDRESS NOT USED */

		ShareMemory.RequestedVirtualAddress = 0;

		/* MAP TO TOP OF THE FRAMEBUFFER */

		ShareMemory.ViewOffset = 0;

		/* MAP THE FRAME BUFFER                                              */                   
		/*                                                                   */
		/* We map down to the end of the frame buffer.                       */
		/*                                                                   */
		/* Note: There is a 64k granularity on the mapping (meaning that     */
		/*		 we have to round up to 64k).                                */
		/*                                                                   */
		/* Note: If there is any portion of the frame buffer that must       */
		/*		 not be modified by an application, that portion of memory   */
		/*		 MUST NOT be mapped in by this call.  This would include     */
		/*		 any data that, if modified by a malicious application,      */
		/*		 would cause the driver to crash.  This could include, for   */
		/*		 example, any DSP code that is kept in off-screen memory.    */

		/* ALLOCATE SPACE FOR 1600x1200 WITH A 4K PITCH             */
		/* Allocate less if the current framebuffer size is smaller */

		heap_size = MIN(gfx_fb_size, 0x580000);
		ShareMemory.ViewSize = ROUND_UP_TO_64K(heap_size);

		if (EngDeviceIoControl(ppdev->hDriver,
							   IOCTL_VIDEO_SHARE_VIDEO_MEMORY,
							   &ShareMemory,
							   sizeof(VIDEO_SHARE_MEMORY),
							   &ShareMemoryInformation,
							   sizeof(VIDEO_SHARE_MEMORY_INFORMATION),
							   &ReturnedDataLength))
		{
			DISPDBG((ppdev, 300, "Failed IOCTL_VIDEO_SHARE_MEMORY"));

			lpMapMemory->ddRVal = DDERR_GENERIC;
			return(DDHAL_DRIVER_HANDLED);
		}

		DISPDBG((ppdev, 200, "Returned address %x\n",
			ShareMemoryInformation.VirtualAddress));

		lpMapMemory->fpProcess = (DWORD) ShareMemoryInformation.VirtualAddress;
	}

	/* ELSE, UNMAP */
	else
	{
		DISPDBG((ppdev, 300, "unmap\n"));

		ShareMemory.ProcessHandle			= lpMapMemory->hProcess;
		ShareMemory.ViewOffset				= 0;
		ShareMemory.ViewSize				= 0;
		ShareMemory.RequestedVirtualAddress = (VOID*) lpMapMemory->fpProcess;

		if (EngDeviceIoControl(ppdev->hDriver,
							   IOCTL_VIDEO_UNSHARE_VIDEO_MEMORY,
							   &ShareMemory,
							   sizeof(VIDEO_SHARE_MEMORY),
							   NULL,
							   0,
							   &ReturnedDataLength))
		{
			DISPDBG ((ppdev, 5000, "Failed IOCTL_VIDEO_UNSHARE_MEMORY\n"));
		}
	}

	lpMapMemory->ddRVal = DD_OK;

	DISPDBG((ppdev, 200, "DdMapMemory leaves\n"));

	return(DDHAL_DRIVER_HANDLED);
}

/*-----------------------------------------------------------------------
 * DdGetFlipStatus
 *
 * If the display has gone through one refresh cycle since the flip
 * occurred, we return DD_OK.  If it has not gone through one refresh
 * cycle we return DDERR_WASSTILLDRAWING to indicate that this surface
 * is still busy "drawing" the flipped page.   We also return
 * DDERR_WASSTILLDRAWING if the bltter is busy and the caller wanted
 * to know if they could flip yet.
 *-----------------------------------------------------------------------*/

DWORD DdGetFlipStatus(PDD_GETFLIPSTATUSDATA lpGetFlipStatus)
{
	PDEV *ppdev;

	ppdev = (PDEV*) lpGetFlipStatus->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdGetFlipStatus\n"));

	/* HANDLE OVERLAY FLIPPING SEPARATE FROM GRAPHICS FLIPPING */

	if (lpGetFlipStatus->lpDDSurface->ddsCaps.dwCaps & DDSCAPS_OVERLAY) 
	{
		/* VIDEO CASE IS TRIVIAL IF NO FLIP IS PENDING */
		/* We can flip and the last flip is done.      */

		if (!ppdev->VideoFlipRecord.bFlipFlag)
		{
			lpGetFlipStatus->ddRVal = DD_OK;
			return DDHAL_DRIVER_HANDLED;
		}

		if ((MEM_READ_REG32 (gfx_virt_regptr, MDC_LINE_CNT_STATUS) & MDC_LNCNT_VFLIP))
		{
			/* CLEAR THE FLIP FLAG */

			ppdev->VideoFlipRecord.bFlipFlag = FALSE;

			/* ALL IS WELL */

			lpGetFlipStatus->ddRVal = DD_OK;
			return(DDHAL_DRIVER_HANDLED);
		}

		lpGetFlipStatus->ddRVal = DDERR_WASSTILLDRAWING;
		return(DDHAL_DRIVER_HANDLED);
	}

	/* CHECK LAST FLIP STATUS */

	lpGetFlipStatus->ddRVal = ddrvalUpdateFlipStatus(ppdev, ppdev->GraphicsFlipRecord.fpFlipFrom);

	/* CHECK BLT BUSY */

	if (lpGetFlipStatus->dwFlags == DDGFS_CANFLIP)
	{
		if ((lpGetFlipStatus->ddRVal == DD_OK) && (BLIT_BUFFER_BUSY))
		{
			lpGetFlipStatus->ddRVal = DDERR_WASSTILLDRAWING;
		}
	}

	DISPDBG((ppdev, 200, "DdGetFlipStatus leaves\n"));

	return(DDHAL_DRIVER_HANDLED);
}


/*-----------------------------------------------------------------------
 * DdInVerticalBlank
 *
 * Returns TRUE if vertical blank is active, otherwise false.
 -----------------------------------------------------------------------*/

BOOL DdInVerticalBlank(PDEV *ppdev)
{
	return (!ppdev->myfuncs.pfn_test_vertical_active());
}

/*-----------------------------------------------------------------------
 * DdWaitForVerticalBlank
 *-----------------------------------------------------------------------*/

DWORD DdWaitForVerticalBlank(PDD_WAITFORVERTICALBLANKDATA lpWaitForVerticalBlank)
{
	PDEV	*ppdev=NULL;

	ppdev	 = (PDEV*) lpWaitForVerticalBlank->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdWaitForVerticalBlank\n"));

	switch (lpWaitForVerticalBlank->dwFlags)
	{
		case DDWAITVB_I_TESTVB:

			/* TESTVB -> CURRENT VBLANK STATUS */
			
			if (DdInVerticalBlank(ppdev))
				lpWaitForVerticalBlank->bIsInVB = TRUE;
			else
				lpWaitForVerticalBlank->bIsInVB = FALSE;

			lpWaitForVerticalBlank->ddRVal = DD_OK;
			return(DDHAL_DRIVER_HANDLED);

		case DDWAITVB_BLOCKBEGIN:

			/* BLOCKBEGIN -> WAIT FOR VBLANK AND THEN DISPLAY BEGIN */
			
			while (DdInVerticalBlank(ppdev))
				;
			while (!DdInVerticalBlank(ppdev))
				;

			lpWaitForVerticalBlank->ddRVal = DD_OK;
			return(DDHAL_DRIVER_HANDLED);

		case DDWAITVB_BLOCKEND:

			/* BLOCKEND -> WAIT FOR VBLANK TO END */

			while (!DdInVerticalBlank(ppdev))
				;
			while (DdInVerticalBlank(ppdev))
				;

			lpWaitForVerticalBlank->ddRVal = DD_OK;
			return(DDHAL_DRIVER_HANDLED);
	}

	DISPDBG((ppdev, 200, "DdWaitForVerticalBlank leaves\n"));

	return(DDHAL_DRIVER_NOTHANDLED);
}

/*-----------------------------------------------------------------------
 * DdCanCreateSurface
 *-----------------------------------------------------------------------*/

DWORD DdCanCreateSurface (PDD_CANCREATESURFACEDATA lpCanCreateSurface)
{
	PDEV           *ppdev;
	DWORD			dwRet;
	LPDDSURFACEDESC lpSurfaceDesc;

	ppdev = (PDEV*) lpCanCreateSurface->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdCanCreateSurface\n"));

	lpSurfaceDesc = lpCanCreateSurface->lpDDSurfaceDesc;

	dwRet = DDHAL_DRIVER_NOTHANDLED;

	if (lpCanCreateSurface->bIsDifferentPixelFormat) 
	{
		DISPDBG((ppdev, 200, "Different pixel format\n"));
		
		if(lpSurfaceDesc->ddsCaps.dwCaps & DDSCAPS_OVERLAY) 
		{
			DISPDBG((ppdev, 200, "overlay surface\n"));

			if(ppdev->OverlayActive) 
			{
				DISPDBG((ppdev, 200, "Surface already active\n"));
				return(dwRet);
			}
        }

		if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_FOURCC) 
		{
			/* CHECK FOR SUPPORTED FOURCC TYPE */

			DISPDBG((ppdev, 200, "FourCC format 0x%x\n",
				lpSurfaceDesc->ddpfPixelFormat.dwFourCC));

			switch(lpSurfaceDesc->ddpfPixelFormat.dwFourCC) 
			{
				case FOURCC_YUY2:
					lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
					dwRet = DDHAL_DRIVER_HANDLED;
					DISPDBG((ppdev, 200, "Handled yuy2\n"));
					break;

				case FOURCC_YVYU:
					lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
					dwRet = DDHAL_DRIVER_HANDLED;
					DISPDBG((ppdev, 200, "Handled yvyu\n"));
					break;

				case FOURCC_UYVY:
					lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
					dwRet = DDHAL_DRIVER_HANDLED;
					DISPDBG((ppdev, 200, "Handled uyvy\n"));
					break;

				case FOURCC_YV12:
				case FOURCC_I420:
				case FOURCC_IYUV:
					lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 12;
					dwRet = DDHAL_DRIVER_HANDLED;
					DISPDBG((ppdev, 200, "Handled yv12\n"));
					break;

				default:
					DISPDBG((ppdev, 200, "Punted\n"));
					break;
			}
		} 
		else 
		{
            if(lpSurfaceDesc->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
            {
                /* ONLY ALLOW RGB SURFACES DIFFERENT FROM THE PRIMARY FOR OVERLAYS */

                if(lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB) 
			    {
				    if(lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 16 &&
                      (lpSurfaceDesc->ddpfPixelFormat.dwRBitMask & 0x8000))
                    {    					
					    DISPDBG((ppdev, 200, "Handled 5:6:5 RGB format\n"));
					    dwRet = DDHAL_DRIVER_HANDLED;
				    }
			    }                
            }
		}
	} 
	else 
	{
		DISPDBG((ppdev, 200, "trivial surface\n"));
		
		/* TRIVIAL SURFACE */
		/* It's trivially easy to create plain surfaces that are the same  */
		/* type as the primary surface:                                    */

		dwRet = DDHAL_DRIVER_HANDLED;
	}

	/* PRINT ERROR MESSAGE IF SURFACE WAS NOT CREATED */

	if (dwRet == DDHAL_DRIVER_NOTHANDLED)
	{
		DISPDBG((ppdev, 200, "non trivial surface\n"));

		if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB)
		{
			DISPDBG((ppdev, 300, "Failed creation of %libpp RGB surface %lx %lx %lx\n",
				lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount,
				lpSurfaceDesc->ddpfPixelFormat.dwRBitMask,
				lpSurfaceDesc->ddpfPixelFormat.dwGBitMask,
				lpSurfaceDesc->ddpfPixelFormat.dwBBitMask));
		}
		else
		{
			DISPDBG((ppdev, 300, "Failed creation of type 0x%lx YUV 0x%lx surface\n",
				lpSurfaceDesc->ddpfPixelFormat.dwFlags,
				lpSurfaceDesc->ddpfPixelFormat.dwFourCC));
		}

        lpCanCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
        return DDHAL_DRIVER_HANDLED;
	}

	lpCanCreateSurface->ddRVal = DD_OK;

	DISPDBG((ppdev, 200, "DdCanCreateSurface leaves\n"));
	return(dwRet);
}

/*-----------------------------------------------------------------------
 * DdCreateSurface
 *-----------------------------------------------------------------------*/

DWORD DdCreateSurface (PDD_CREATESURFACEDATA lpCreateSurface)
{
	PDEV *ppdev;
	DD_SURFACE_LOCAL *lpSurfaceLocal;
	DD_SURFACE_GLOBAL *lpSurfaceGlobal;
	LPDDSURFACEDESC lpSurfaceDesc;
	DWORD dwByteCount;
	LONG lLinearPitch;
	ULONG surfaceSize;
    ULONG vidFormat, vid_uextra, vid_vextra;
	DWORD dwHeight;
	ULONG VidCfg;
	int ufirst = 0;

	ppdev = (PDEV*) lpCreateSurface->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdCreateSurface\n"));

	lpCreateSurface->ddRVal = DD_OK;

	/* On Windows NT, dwSCnt will always be 1, so there will only ever  */
	/* be one entry in the 'lplpSList' array:                           */

	lpSurfaceLocal	= lpCreateSurface->lplpSList[0];
	lpSurfaceGlobal = lpSurfaceLocal->lpGbl;
	lpSurfaceDesc	= lpCreateSurface->lpDDSurfaceDesc;

	DISPDBG((ppdev, 200, "Surface local 0x%x, global 0x%x, desc 0x%x\n",
			lpSurfaceLocal, lpSurfaceGlobal, lpSurfaceDesc));

	/* DOUBLE CHECK */
	/* We repeat the same checks we did in 'DdCanCreateSurface' because      */
	/* it's possible that an application doesn't call 'DdCanCreateSurface'   */
	/* before calling 'DdCreateSurface'.                                     */

	ASSERTVDD(lpSurfaceGlobal->ddpfSurface.dwSize == sizeof(DDPIXELFORMAT),
		"NT is supposed to guarantee that ddpfSurface.dwSize is valid");

	DISPDBG((ppdev, 200, "Caps %x, flags %x\n",
			lpSurfaceLocal->ddsCaps.dwCaps,
			lpSurfaceGlobal->ddpfSurface.dwFlags));
	DISPDBG((ppdev, 200, "ddpfSurface.dwRBitMask %x, ppdev->flRed %x\n",
			lpSurfaceGlobal->ddpfSurface.dwRBitMask, ppdev->flRed));

	/* ALLOCATE A SURFACE */
	/* The DDraw heap grows from the end of memory backward.  If a surface needs  */
	/* more memory than is available, we try to free up memory by packing the GDI */
	/* heap or by relocating GDI bitmaps to system memory.  If we still don't     */
	/* have room, we fail the call.                                               */

	/* CHECK IF THERE IS A HEAP */

	if (!heap_start)
	{
		lpCreateSurface->ddRVal = DDERR_OUTOFVIDEOMEMORY;
		return(DDHAL_DRIVER_HANDLED);
	}

	/* CHECK FOURCC CODES */

	if (lpSurfaceGlobal->ddpfSurface.dwFlags & DDPF_FOURCC)
	{
		DISPDBG((ppdev, 200, "Deterining FourCC surface type\n"));

		switch(lpSurfaceGlobal->ddpfSurface.dwFourCC) 
		{
			case FOURCC_YUY2:
				vidFormat = VIDEO_FORMAT_YUYV;
				break;
			case FOURCC_YVYU:
				vidFormat = VIDEO_FORMAT_YVYU;
				break;
			case FOURCC_UYVY:
				vidFormat = VIDEO_FORMAT_UYVY;
				break;
			case FOURCC_YV12:
				vidFormat = VIDEO_FORMAT_Y0Y1Y2Y3;
				break;
			case FOURCC_IYUV:
			case FOURCC_I420:
				vidFormat = VIDEO_FORMAT_Y0Y1Y2Y3;
				ufirst = 1;
				break;
			default:
				lpCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
				DISPDBG((ppdev, 3000, "Unexpected FourCC surface %x\n",
					lpSurfaceGlobal->ddpfSurface.dwFourCC));
				break;
		}

		DISPDBG((ppdev, 200, "Created YUV: %li x %li\n",
			lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight));

        /* CALCULATE THE PITCH AND TOTAL SIZE */
		/* We have to fill in the pitch for FOURCC surfaces.  GX2 hardware   */
		/* has some pitch hardware restrictions.  For 4:2:0, the u and v     */
		/* pitches must be qword aligned.  As applications infer that the uv */
		/* pitches are 1/2 the y pitch, the y pitch must be 16 byte aligned. */
			
		if (vidFormat == VIDEO_FORMAT_Y0Y1Y2Y3)
		{
			unsigned long offset1, offset2;
            lpSurfaceGlobal->ddpfSurface.dwYUVBitCount = 12;
			lLinearPitch =  (lpSurfaceGlobal->wWidth + 15) & ~0xF;
			surfaceSize  = ((lpSurfaceGlobal->wHeight) * lLinearPitch) +
					       ((lpSurfaceGlobal->wHeight) * (lLinearPitch >> 1));
						  
			offset1 = lpSurfaceGlobal->wHeight * lLinearPitch;
			offset2 = offset1 + (lpSurfaceGlobal->wHeight >> 1) * (lLinearPitch >> 1);

			if (ufirst)
			{
				vid_uextra = offset1;
				vid_vextra = offset2;
			}
			else
			{
				vid_uextra = offset2;
				vid_vextra = offset1;
			}
		}
		else
		{
			lpSurfaceGlobal->ddpfSurface.dwYUVBitCount = 16;
			lLinearPitch = (lpSurfaceGlobal->wWidth * 2 + 3) & ~3;
			surfaceSize  = (lpSurfaceGlobal->wHeight * lLinearPitch);
		}

        /* STORE THE SURFACE SPECIFIC INFORMATION */

		lpSurfaceGlobal->lPitch 	  = lLinearPitch;
		lpSurfaceGlobal->dwReserved1  = vidFormat | DD_RESERVED_DIFFERENTPIXELFORMAT;

		lpSurfaceDesc->lPitch	= lLinearPitch;
		lpSurfaceDesc->dwFlags |= DDSD_PITCH;

		/* UPDATE THE SURFACE SIZE TO FORCE 16-BYTE ALIGNMENT */
		/* GX2 has an alignment restriction for video offsets.  To meet this */
		/* restriction, we increase the bitmap size such that the resulting  */
		/* offset will be 16-byte aligned.                                   */

		surfaceSize += (ddraw_offset - surfaceSize) & 0xF;
	}
    else
    {
        /* RGB SURFACE                        */
        /* Could still be an overlay surface. */

        if (lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
        {
            DISPDBG((ppdev, 200, "RGB surface type\n"));
			dwByteCount = lpSurfaceGlobal->ddpfSurface.dwRGBBitCount >> 3;

			DISPDBG((ppdev, 200, "Creating RGB %libpp: %li x %li Red: %lx\n",
				8 * dwByteCount, lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight,
				lpSurfaceGlobal->ddpfSurface.dwRBitMask));

			/* 16BPP ONLY */

			if (dwByteCount != 2) 
			{
				DISPDBG((ppdev, 300, "Invalid byte count\n"));
				lpCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
				return(DDHAL_DRIVER_HANDLED);
			}

			vidFormat = VIDEO_FORMAT_RGB;
            lLinearPitch = (lpSurfaceGlobal->wWidth * 2 + 3) & ~3;
			surfaceSize  = (lpSurfaceGlobal->wHeight * lLinearPitch);

            lpSurfaceGlobal->lPitch 	  = lLinearPitch;
		    lpSurfaceGlobal->dwReserved1  = vidFormat | DD_RESERVED_DIFFERENTPIXELFORMAT;

		    lpSurfaceDesc->lPitch	= lLinearPitch;
		    lpSurfaceDesc->dwFlags |= DDSD_PITCH;

		    /* UPDATE THE SURFACE SIZE TO FORCE 16-BYTE ALIGNMENT */
		    /* GX2 has an alignment restriction for video offsets.  To meet this */
		    /* restriction, we increase the bitmap size such that the resulting  */
		    /* offset will be 16-byte aligned.                                   */

		    surfaceSize += (ddraw_offset - surfaceSize) & 0xF;
        }
        else
        {
            if (lpSurfaceGlobal->ddpfSurface.dwRGBBitCount != ppdev->cBitsPerPel)
            {
                DISPDBG ((ppdev, 3000, "Failing RGB Surface creation different from primary.\n"));
                lpCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
				return(DDHAL_DRIVER_HANDLED);
            }

            lpSurfaceGlobal->dwReserved1 = 0;

		    if ((lpSurfaceDesc->dwFlags & DDSD_CAPS) && 
			    (lpSurfaceDesc->ddsCaps.dwCaps & DDSCAPS_PRIMARYSURFACE))
		    {
			    lLinearPitch = mode_pitch;
		    }
		    else
		    {
			    lLinearPitch = ((lpSurfaceGlobal->wWidth << mode_shift) + 3) & ~3l;
		    }
		    surfaceSize  =   lLinearPitch * lpSurfaceGlobal->wHeight;
        }
    }

    DISPDBG ((ppdev, 30, "Surface Size and pitch are 0x%X, 0x%X\n", surfaceSize, lLinearPitch)); 

    /* CALCULATE THE MEMORY NEEDED */

	if (lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
	{
		DISPDBG((ppdev, 200, "an overlay surface\n"));

		/* DO NOT ALLOCATE AN OVERLAY SURFACE WHEN AN OVERLAY IS VISIBLE        */
		/* This makes the assumption that all applications will create all      */
		/* overlay surfaces in one batch (one buffer with multiple backbuffers) */
		/* and then enable video.                                               */

		if (ppdev->VisibleOverlay)
		{
			DISPDBG ((ppdev, 3000, "Trying to create an overlay surface with overlay active\n"));
			lpCreateSurface->ddRVal = DDERR_OUTOFCAPS;
			return(DDHAL_DRIVER_HANDLED);
		}

        /* STORE THE BUFFER SIZE */

		g_wHeight      = lpSurfaceGlobal->wHeight;
		g_lLinearPitch = lLinearPitch;
		g_wWidth       = lpSurfaceGlobal->wWidth;

        Format = vidFormat;
        uextra = vid_uextra;
        vextra = vid_vextra;
    }

	/* CHECK FOR AVAILABLE ROOM */

	if (!AddDDrawBitmap (ppdev, surfaceSize, lLinearPitch))
	{
		DISPDBG ((ppdev, 3000, "Ran out of memory!\n"));
		lpCreateSurface->ddRVal = DDERR_OUTOFVIDEOMEMORY;
		return(DDHAL_DRIVER_HANDLED);
	}

    /* FILL IN FPVIDMEM POINTER */

	lpSurfaceGlobal->fpVidMem = ddraw_offset;
	lpSurfaceGlobal->lPitch   = lLinearPitch;
	
	lpSurfaceDesc->lPitch	= lLinearPitch;
	lpSurfaceDesc->dwFlags |= DDSD_PITCH;

	lpCreateSurface->ddRVal = DD_OK;
	return(DDHAL_DRIVER_HANDLED);
}

/*--------------------------------------------------------------------
 * DdDestroySurface
 *
 * Destroys a surface created by DdCreateSurface.
 *--------------------------------------------------------------------*/

DWORD DdDestroySurface (PDD_DESTROYSURFACEDATA  lpDestroySurface)
{
	PDEV *ppdev;

	ppdev = (PDEV*) lpDestroySurface->lpDD->dhpdev;
	DISPDBG((ppdev, 200, "DdDestroySurface\n"));

	if (lpDestroySurface->lpDDSurface->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
	{
		/* CLEAR OVERLAY FLAGS */
		
		ppdev->OverlayActive  = FALSE;	
	}

	/* CLEAR SURFACE POINTER */

	lpDestroySurface->lpDDSurface->lpGbl->fpVidMem = 0;

    lpDestroySurface->lpDDSurface->lpGbl->dwReserved1 = 0;

	/* DECREMENT ACTIVE SURFACE COUNT */
	/* Reset the heap if this is the last surface.  Flag an error if this */
	/* call happened for invalid surfaces.                                */

	ddraw_invalid++;

	ASSERTVDD(ddraw_count >= ddraw_invalid, "Too many bitmaps deleted");

	if (ddraw_invalid == ddraw_count)
	{
		DISPDBG ((ppdev, 3000, "Resetting ddraw heap...\n"));
		ResetDDrawHeap();
	}
			
	lpDestroySurface->ddRVal = DD_OK;
	return(DDHAL_DRIVER_NOTHANDLED);
}

/*-------------------------------------------------------------------
* DWORD DdUpdateOverlay
*--------------------------------------------------------------------*/

DWORD DdUpdateOverlay(
	PDD_UPDATEOVERLAYDATA lpUpdateOverlay)
{
	PDEV               *ppdev;
	DD_SURFACE_GLOBAL*	lpSource;
	DD_SURFACE_GLOBAL*	lpDestination;
	ULONG               VidCfg;
	DWORD               DstOffset;
	DWORD               DstWidth;
	DWORD               DstHeight;
	DWORD               SrcWidth;
	DWORD               SrcHeight;
	DWORD               ColorKey;
	BOOL                dstColorKey;
	BOOL                srcColorKey;
	DWORD               i;
	RECT                rDst;
	int                 xFilter, yFilter;

	ppdev = (PDEV*) lpUpdateOverlay->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "DdUpdateOverlay\n"));
	DISPDBG((ppdev, 200, "Visible Overlay 0x%x\n", ppdev->VisibleOverlay));

	/* READ POINTERS */
	/* 'Source' is the overlay surface, 'destination' is the surface to be */
	/* overlayed.                                                          */

	lpSource = lpUpdateOverlay->lpDDSrcSurface->lpGbl;
	DISPDBG((ppdev, 200, "source video pointer 0x%x\n",
			lpSource->fpVidMem));

	if (lpUpdateOverlay->dwFlags & DDOVER_HIDE)
	{
		DISPDBG((ppdev, 200, "Turn off overlay\n"));
		
		/* CHECK IF WE ARE DISABLING THE CORRECT OVERLAY */
		/* No work if the overlays don't match.          */

		if (ppdev->video_offset != lpSource->fpVidMem && ppdev->VisibleOverlay)
		{
			DISPDBG ((ppdev, 3000, "Trying to hide invalid overlay\n"));
			lpUpdateOverlay->ddRVal = DDERR_OUTOFCAPS;
		}

		/* DISABLE OVERLAY */
		
		else
		{		
			ppdev->myfuncs.pfn_set_video_enable (0);
			ppdev->VisibleOverlay = 0;
			lpUpdateOverlay->ddRVal = DD_OK;
		}		
	} 
	else 
	{
		/* CHECK FOR OVERLAY MATCH */

		if (ppdev->video_offset != lpSource->fpVidMem && ppdev->VisibleOverlay)
		{
			DISPDBG ((ppdev, 3000, "Trying to update invalid overlay\n"));
			lpUpdateOverlay->ddRVal = DDERR_OUTOFCAPS;
			return(DDHAL_DRIVER_HANDLED);
		}

		DISPDBG((ppdev, 200, "Something else\n"));
	
		/* ON THE SAFE SIDE... */
		/* Dereference 'lpDDDestSurface' only after checking for the DDOVER_HIDE */
		/* case:                                                                 */
	
		lpDestination = lpUpdateOverlay->lpDDDestSurface->lpGbl;
	
		if (lpUpdateOverlay->dwFlags & DDOVER_SHOW)
		{
			ppdev->myfuncs.pfn_set_video_format(Format);
			ppdev->myfuncs.pfn_set_video_size((USHORT)g_wWidth, 
				(USHORT)g_wHeight);
			ppdev->myfuncs.pfn_set_video_enable(1);
			
			ppdev->OverlayActive  = TRUE;
			ppdev->VisibleOverlay = 1;
			ppdev->VideoFlipRecord.fpFlipFrom = lpSource->fpVidMem;
		}
	
		/* DURANGO CALLS SHOULD BE MADE IN ORDER                   */
		/* We do:- source--scale--offset--position--source. In that order. */
	
		DISPDBG((ppdev, 200, "src (%d,%d) (%d,%d) dest (%d,%d) (%d,%d)\n",
			lpUpdateOverlay->rSrc.left, lpUpdateOverlay->rSrc.top,
			lpUpdateOverlay->rSrc.right, lpUpdateOverlay->rSrc.bottom,
			lpUpdateOverlay->rDest.left, lpUpdateOverlay->rDest.top,
			lpUpdateOverlay->rDest.bottom, lpUpdateOverlay->rDest.right));
	
		/* SOURCE SIZE */
		/* The size is saved for top clipping later. */

		ppdev->video_src_height = lpSource->wHeight;
		
		ppdev->myfuncs.pfn_set_video_size ((USHORT)(lpSource->wWidth),
			(USHORT)lpSource->wHeight);

		/* LOAD SCALE INFORMATION */

		DstWidth  = lpUpdateOverlay->rDest.right  - lpUpdateOverlay->rDest.left;
		DstHeight = lpUpdateOverlay->rDest.bottom - lpUpdateOverlay->rDest.top;
		SrcWidth  = lpUpdateOverlay->rSrc.right   - lpUpdateOverlay->rSrc.left;
		SrcHeight = lpUpdateOverlay->rSrc.bottom  - lpUpdateOverlay->rSrc.top;
		DISPDBG((ppdev, 200, "destsize (%d,%d), sourcesize (%d,%d)\n",
			DstWidth, DstHeight, SrcWidth, SrcHeight));

		/* SAVE THE X SCALE INFORMATION             */
		/* This is used for left clipping later on. */

		ppdev->video_src_width  = SrcWidth;
		ppdev->video_dst_width  = DstWidth;
		
		ppdev->myfuncs.pfn_set_video_scale (
			(unsigned short)SrcWidth, (unsigned short)SrcHeight,
			(unsigned short)DstWidth, (unsigned short)DstHeight);

		/* SET VIDEO FILTERS */
		/* Filters are only enabled when upscaling video */

		xFilter = (DstWidth  != SrcWidth);
		yFilter = (DstHeight != SrcHeight);
		ppdev->myfuncs.pfn_set_video_filter (xFilter, yFilter);
		
		/* SET THE YUV PITCH FOR 4:2:0 */

		if (Format == VIDEO_FORMAT_Y0Y1Y2Y3)
			ppdev->myfuncs.pfn_set_video_yuv_pitch (g_lLinearPitch, g_lLinearPitch >> 1);

		/* SAVE THE VIDEO OFFSET */
		/* The video offset is used when setting the video window position to */
		/* account for top clipping.                                          */
		
		ppdev->video_offset    = lpSource->fpVidMem;
		ppdev->clippedSrcLines = lpUpdateOverlay->rSrc.top;
		ppdev->xcrop           = lpUpdateOverlay->rSrc.left;

		/* SET VIDEO POSITION */
		/* We save the video position in our PDev structure for use when clipping later on. */

		ppdev->video_x = lpUpdateOverlay->rDest.left;
		ppdev->video_y = lpUpdateOverlay->rDest.top;
		ppdev->video_dst_height = DstHeight;
		SetVideoPosition (
			ppdev,
			lpUpdateOverlay->rDest.left,
			lpUpdateOverlay->rDest.top,
			DstWidth, DstHeight, TRUE);
					
   		/* COLOR KEY? */
		 
		dstColorKey = FALSE;
		srcColorKey = FALSE;

		if (lpUpdateOverlay->dwFlags & DDOVER_KEYDEST)
		{
			ColorKey  = lpUpdateOverlay->lpDDDestSurface->ddckCKDestOverlay.dwColorSpaceLowValue;
			dstColorKey = TRUE;
		}
		else if (lpUpdateOverlay->dwFlags & DDOVER_KEYDESTOVERRIDE)
		{
			ColorKey  = lpUpdateOverlay->overlayFX.dckDestColorkey.dwColorSpaceLowValue;
			dstColorKey = TRUE;
		}
		if (lpUpdateOverlay->dwFlags & DDOVER_KEYSRC)
		{
			ColorKey  = lpUpdateOverlay->lpDDSrcSurface->ddckCKSrcOverlay.dwColorSpaceLowValue;
			srcColorKey = TRUE;
		}
		else if (lpUpdateOverlay->dwFlags & DDOVER_KEYSRCOVERRIDE)
		{
			ColorKey  = lpUpdateOverlay->overlayFX.dckSrcColorkey.dwColorSpaceLowValue;
			srcColorKey = TRUE;
		}

		if (srcColorKey || dstColorKey) 
		{
			if (srcColorKey && dstColorKey)
			{
				/* ONLY ONE COLOR KEY IS SUPPORTED */
				
				lpUpdateOverlay->ddRVal = DDERR_OUTOFCAPS;
			
				return DDHAL_DRIVER_HANDLED;
			}		
			
			/* SOURCE COLOR KEY - CHROMA KEY OFF VIDEO DATA */
			/* The video data will always be 16BPP          */

			if (srcColorKey)
			{
				SetChromaKey (ppdev, ColorKey, 16, 0);
			}

			/* DESTINATION COLOR KEY - COLOR KEY OFF GRAPHICS DATA */
			/* The color key is in the current display mode.       */

			else
			{
				SetChromaKey (ppdev, ColorKey, ppdev->myfuncs.pfn_get_display_bpp(), 1);
			}
		} 
		else 
		{
			/* DISABLE CHROMA KEY          */
			/* Video will always be on top */

			SetChromaKey(ppdev, 0, 0, 1);
		}

		lpUpdateOverlay->ddRVal = DD_OK;
	}
	DISPDBG((ppdev, 200, "DdUpdateOverlay leaves\n"));
	return(DDHAL_DRIVER_HANDLED);
}

/*-------------------------------------------------------------------
 * DWORD DdSetOverlayPosition
 *-------------------------------------------------------------------*/

DWORD DdSetOverlayPosition(
	PDD_SETOVERLAYPOSITIONDATA lpSetOverlayPosition)
{
	PDEV	*ppdev;
	DWORD	VidCfg;

	ppdev = (PDEV*) lpSetOverlayPosition->lpDD->dhpdev;

	DISPDBG((ppdev, 200, "SET OVERLAY POSITION\n" ));
	DISPDBG((ppdev, 200, "Overlay surface = %08lx\n", lpSetOverlayPosition->lpDDSrcSurface ));
	DISPDBG((ppdev, 200, "(%ld,%ld)\n", lpSetOverlayPosition->lXPos, lpSetOverlayPosition->lYPos ));

	ppdev->video_x = lpSetOverlayPosition->lXPos;
	ppdev->video_y = lpSetOverlayPosition->lYPos;
	SetVideoPosition (ppdev, lpSetOverlayPosition->lXPos,
			lpSetOverlayPosition->lYPos, 0, 0, FALSE);

	lpSetOverlayPosition->ddRVal = DD_OK;
	return DDHAL_DRIVER_HANDLED;

}

