これが遅くなる場合があるコード
WORD px, tmp;
BYTE b;
int X = 0;
for(int y = ImageHeight - 1; y >= 0; --y){
X = 0;
for(int x = 0; x < ImageWidth; ++x){
px = 0x00000000;
pPx[X] = px;

b = (BYTE)((((DWORD*)pSrcBuf)[x + (y * ImageWidth)] & 0xff000000) >> 24); //A
tmp = 15 * (b / 255.f);
px |= tmp << 12;

b = (BYTE)((((DWORD*)pSrcBuf)[x + (y * ImageWidth)] & 0x00ff0000) >> 16); //R
tmp = 15 * (b / 255.f);
px |= tmp << 8;

b = (BYTE)((((DWORD*)pSrcBuf)[x + (y * ImageWidth)] & 0x0000ff00) >> 8); //G
tmp = 15 * (b / 255.f);
px |= tmp << 4;

b = (BYTE)((((DWORD*)pSrcBuf)[x + (y * ImageWidth)] & 0x000000ff)); //B
tmp = 15 * (b / 255.f);
px |= tmp;

pPx[X] = px;

++X;
}
pPx += Pitch;
}