hls_color_texture.cpp

Go to the documentation of this file.
00001 
00005 /* Copyright, 2000-2002 Nevrax Ltd.
00006  *
00007  * This file is part of NEVRAX NEL.
00008  * NEVRAX NEL is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2, or (at your option)
00011  * any later version.
00012 
00013  * NEVRAX NEL is distributed in the hope that it will be useful, but
00014  * WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016  * General Public License for more details.
00017 
00018  * You should have received a copy of the GNU General Public License
00019  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00020  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00021  * MA 02111-1307, USA.
00022  */
00023 
00024 #include "std3d.h"
00025 #include "nel/3d/hls_color_texture.h"
00026 #include "nel/misc/fast_floor.h"
00027 #include "nel/3d/fasthls_modifier.h"
00028 #include "nel/misc/stream.h"
00029 #include "nel/misc/bitmap.h"
00030 #include "nel/misc/system_info.h"
00031 #include "nel/misc/algo.h"
00032 
00033 
00034 using   namespace std;
00035 using   namespace NLMISC;
00036 
00037 namespace NL3D
00038 {
00039 
00040 
00041 #define BLOCK_NUM_PIXEL     16
00042 #define BLOCK_DXTC_SIZE     16
00043 #define BLOCK_ALPHA_SIZE    16
00044 
00045 
00046 // ***************************************************************************
00047 void    CHLSColorDelta::serial(NLMISC::IStream &f)
00048 {
00049     f.serialVersion(0);
00050     f.serial(DHue, DLum, DSat);
00051 }
00052 
00053 
00054 // ***************************************************************************
00055 void            CHLSColorTexture::CMask::serial(NLMISC::IStream &f)
00056 {
00057     f.serialVersion(0);
00058 
00059     f.serial(FullBlockIndex);
00060     f.serial(MixtBlockIndex);
00061     f.serialCont(Data);
00062 }
00063 
00064 
00065 // ***************************************************************************
00066 void            CHLSColorTexture::CMask::setBit(uint bitId)
00067 {
00068     uint8   &b= Data[bitId/8];
00069     b|= 1<<(bitId&7);
00070 }
00071 
00072 
00073 // ***************************************************************************
00074 CHLSColorTexture::CHLSColorTexture()
00075 {
00076     reset();
00077 }
00078 
00079 // ***************************************************************************
00080 void            CHLSColorTexture::reset()
00081 {
00082     _Width= 0;
00083     _Height= 0;
00084     _NumMipMap= 0;
00085     contReset(_Texture);
00086     contReset(_Masks);
00087 }
00088 
00089 // ***************************************************************************
00090 void            CHLSColorTexture::setBitmap(const NLMISC::CBitmap &bmp)
00091 {
00092     nlassert(bmp.getPixelFormat()==CBitmap::DXTC5);
00093     uint    width= bmp.getWidth();
00094     uint    height= bmp.getHeight();
00095     uint    mmCount= bmp.getMipMapCount();
00096     nlassert(width>=1 && height>=1);
00097     nlassert(mmCount>1 || width*height==1);
00098 
00099     // restart
00100     reset();
00101 
00102     // resize.
00103     uint    m;
00104     uint    pixelSize= 0;
00105     uint    numTotalBlock= 0;
00106     for(m=0;m<mmCount;m++)
00107     {
00108         pixelSize+= bmp.getPixels(m).size();
00109         uint    mmWidth= bmp.getWidth(m);
00110         uint    mmHeight= bmp.getHeight(m);
00111         uint    wBlock= (mmWidth+3)/4;
00112         uint    hBlock= (mmHeight+3)/4;
00113         numTotalBlock+= wBlock*hBlock;
00114     }
00115     // add the info for the "Block to compress"
00116     uint    blockToCompressSize= 4*((numTotalBlock+31)/32);
00117     // allocate good size, and reset to 0 => no block to re-compress.
00118     _Texture.resize(pixelSize+blockToCompressSize, 0);
00119 
00120     // fill texture
00121     uint8   *ptr= &_Texture[0];
00122     for(m=0;m<mmCount;m++)
00123     {
00124         uint    mSize= bmp.getPixels(m).size();
00125         memcpy(ptr, &bmp.getPixels(m)[0], mSize);
00126         ptr+= mSize;
00127     }
00128 
00129     // header
00130     _BlockToCompressIndex= pixelSize;
00131     _Width= width;
00132     _Height= height;
00133     _NumMipMap= mmCount;
00134 }
00135 
00136 
00137 // ***************************************************************************
00138 #define MASK_BLOCK_EMPTY    0
00139 #define MASK_BLOCK_FULL     1
00140 #define MASK_BLOCK_MIXT     2
00141 struct  CMaskInfo
00142 {
00143     // list of block
00144     uint                WBlock, HBlock;
00145     uint                NumBlock;
00146     vector<uint8>       Blocks; // 0 empty, 1. Full. 2. Mixt.
00147 };
00148 
00149 // ***************************************************************************
00150 void            CHLSColorTexture::addMask(const NLMISC::CBitmap &bmpIn, uint threshold)
00151 {
00152     // copy the bitmap and set RGBA/mipmaps.
00153     CBitmap     bmp= bmpIn;
00154     bmp.convertToType(CBitmap::RGBA);
00155     bmp.buildMipMaps();
00156 
00157     // verify widht...
00158     nlassert(bmp.getWidth()== _Width);
00159     nlassert(bmp.getHeight()== _Height);
00160     nlassert(bmp.getMipMapCount()== _NumMipMap);
00161 
00162     // ***** build the information for all mipmaps
00163     vector<CMaskInfo>   masks;
00164     masks.resize(_NumMipMap);
00165     uint    m;
00166     uint    numMixtBlock= 0;
00167     uint    numTotalBlock= 0;
00168     for(m=0;m<_NumMipMap;m++)
00169     {
00170         CMaskInfo   &mask= masks[m];
00171         uint    mmWidth= bmp.getWidth(m);
00172         uint    mmHeight= bmp.getHeight(m);
00173         mask.WBlock= (mmWidth+3)/4;
00174         mask.HBlock= (mmHeight+3)/4;
00175         mask.NumBlock= mask.WBlock*mask.HBlock;
00176         mask.Blocks.resize(mask.NumBlock);
00177 
00178         numTotalBlock+= mask.NumBlock;
00179 
00180         CRGBA   *src= (CRGBA*)(&bmp.getPixels(m)[0]);
00181 
00182         for(uint yB=0;yB<mask.HBlock;yB++)
00183         {
00184             for(uint xB=0;xB<mask.WBlock;xB++)
00185             {
00186                 uint    accum= 0;
00187                 uint    w= min(mmWidth, 4U);
00188                 uint    h= min(mmHeight, 4U);
00189                 for(uint y= 0;y< h;y++)
00190                 {
00191                     for(uint x= 0;x< w;x++)
00192                     {
00193                         uint    yPix= yB*4+y;
00194                         uint    xPix= xB*4+x;
00195                         // read the color
00196                         uint8   alphaMask = src[yPix*mmWidth+xPix].R;
00197                         // remove some dummy precision.
00198                         if(alphaMask<threshold)
00199                             alphaMask= 0;
00200                         if(alphaMask>255-threshold)
00201                             alphaMask= 255;
00202                         // Add to the accum
00203                         accum+= alphaMask;
00204                     }
00205                 }
00206 
00207                 // full black?
00208                 if(accum==0)
00209                     mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_EMPTY;
00210                 else if(accum==w*h*255)
00211                     mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_FULL;
00212                 // if not full white or full black, mixt block
00213                 else
00214                 {
00215                     mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_MIXT;
00216                     numMixtBlock++;
00217                 }
00218             }
00219         }
00220     }
00221 
00222     // ***** compress into CMask
00223     CMask       newMask;
00224     uint        newMaskDataSize= 0;
00225 
00226     // add the mixt block data size (16*uint8 per block)
00227     newMaskDataSize+= numMixtBlock*BLOCK_ALPHA_SIZE;
00228     // compute the bit size. NB: use uint32 to blocks bits. => data is aligned.
00229     uint    bitDataSize= 4*((numTotalBlock+31)/32);
00230     // add fullBlock bits
00231     newMask.FullBlockIndex= newMaskDataSize;
00232     newMaskDataSize+= bitDataSize;
00233     // add mixtBlock bits
00234     newMask.MixtBlockIndex= newMaskDataSize;
00235     newMaskDataSize+= bitDataSize;
00236 
00237     // allocate. Fill with 0 to initialize bits per default EMPTY value
00238     newMask.Data.resize(newMaskDataSize, 0);
00239 
00240     // compress each mipMaps from bigger to smaller
00241     uint    bitId= 0;
00242     uint    mixtBlockId= 0;
00243     for(m=0;m<_NumMipMap;m++)
00244     {
00245         CMaskInfo   &mask= masks[m];
00246 
00247         // ---- build the mixtBlock alpha Mask
00248         for(uint yB=0;yB<mask.HBlock;yB++)
00249         {
00250             for(uint xB=0;xB<mask.WBlock;xB++)
00251             {
00252                 uint    id= yB*mask.WBlock+xB;
00253                 // if mixt block
00254                 if(mask.Blocks[id]==MASK_BLOCK_MIXT)
00255                 {
00256                     nlassert(mixtBlockId<numMixtBlock);
00257                     // Fill Alpha data.
00258                     uint8   *dst= &newMask.Data[mixtBlockId*BLOCK_ALPHA_SIZE];
00259                     uint    mmWidth= bmp.getWidth(m);
00260                     uint    mmHeight= bmp.getHeight(m);
00261                     // point to the src alpha color
00262                     CRGBA   *src= (CRGBA*)(&bmp.getPixels(m)[0]);
00263                     src= src + yB*4*mmWidth + xB*4;
00264 
00265                     // for the 4*4 pixels
00266                     uint    w= min(mmWidth, 4U);
00267                     uint    h= min(mmHeight, 4U);
00268                     for(uint y=0;y<h;y++)
00269                     {
00270                         for(uint x=0;x<w;x++)
00271                         {
00272                             dst[y*4+x]= src[y*mmWidth+x].R;
00273                         }
00274                     }
00275 
00276                     // inc
00277                     mixtBlockId++;
00278                 }
00279             }
00280         }
00281 
00282         // ---- build the fullBlock and mixtBlocks bits.
00283         for(uint i=0; i<mask.NumBlock; i++)
00284         {
00285             nlassert(bitId<numTotalBlock);
00286 
00287             // fill bits
00288             if(mask.Blocks[i]==MASK_BLOCK_FULL)
00289                 newMask.setBit(newMask.FullBlockIndex*8 + bitId);
00290             else if(mask.Blocks[i]==MASK_BLOCK_MIXT)
00291                 newMask.setBit(newMask.MixtBlockIndex*8 + bitId);
00292 
00293             // inc
00294             bitId++;
00295         }
00296     }
00297 
00298     // ***** Add the CMask
00299     _Masks.push_back(newMask);
00300 
00301     // Or the BlockToCompress info with the MixtBlocks bits.
00302     nlassert(bitDataSize==_Texture.size()-_BlockToCompressIndex);
00303     for(uint i=0;i<bitDataSize;i++)
00304     {
00305         _Texture[_BlockToCompressIndex+i]|= newMask.Data[newMask.MixtBlockIndex+i];
00306     }
00307 }
00308 
00309 
00310 // ***************************************************************************
00311 void            CHLSColorTexture::serial(NLMISC::IStream &f)
00312 {
00313     f.serialVersion(0);
00314 
00315     f.serial(_Width, _Height, _NumMipMap, _BlockToCompressIndex);
00316     f.serialCont(_Texture);
00317     f.serialCont(_Masks);
00318 }
00319 
00320 
00321 // ***************************************************************************
00322 static inline   void    getBitPack(uint32 *bitPtr, uint32 &bitMask)
00323 {
00324 #ifdef NL_LITTLE_ENDIAN
00325     bitMask= *bitPtr;
00326 #else
00327     bitMask = ((uint8*)bitPtr)[0];
00328     bitMask+= ((uint8*)bitPtr)[1]<<8;
00329     bitMask+= ((uint8*)bitPtr)[2]<<16;
00330     bitMask+= ((uint8*)bitPtr)[3]<<24;
00331 #endif
00332 }
00333 
00334 // ***************************************************************************
00335 void            CHLSColorTexture::buildColorVersion(const CHLSColorDelta *colDeltaList, NLMISC::CBitmap &out)
00336 {
00337     // static to avoid realloc
00338     static  vector<uint8>   dstTexture;
00339     static  vector<CRGBA>   dstUnCompTexture;
00340     uint32  *bitPtr;
00341     uint8   *srcPtr;
00342     uint8   *dstPtr;
00343     CRGBA   *dstUnCompPtr;
00344     uint32  bitMask;
00345 
00346     // **** prepare Data
00347 
00348     // count number of DXTC5 block in _Texture.
00349     uint    numBlocks= _BlockToCompressIndex/BLOCK_DXTC_SIZE;
00350 
00351     // create a tmp compressed block array, copy of Texture.
00352     dstTexture.resize(numBlocks*BLOCK_DXTC_SIZE);
00353     // copy from texture (to have non colored version already copied, and also ALPHA ok)
00354     memcpy(&dstTexture[0], &_Texture[0], dstTexture.size());
00355 
00356     // create a tmp uncompressed block array, which will receive coloring of mixt blocks
00357     dstUnCompTexture.resize(numBlocks*BLOCK_NUM_PIXEL);
00358 
00359     // For all blockToCompress, uncompress them in dstUnCompTexture, because they will blend with future mask coloring
00360     uint    n= numBlocks;
00361     bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
00362     dstUnCompPtr= &dstUnCompTexture[0];
00363     srcPtr= &_Texture[0];
00364     while(n>0)
00365     {
00366         uint    nBits= min(n, 32U);
00367         getBitPack(bitPtr, bitMask);
00368         n-= nBits;
00369         bitPtr++;
00370         for(;nBits>0;nBits--)
00371         {
00372             // need to compress/uncompress ??
00373             if(bitMask&1)
00374             {
00375                 // uncompress this block. ignore alpha
00376                 uncompressBlockRGB(srcPtr, dstUnCompPtr);
00377             }
00378             bitMask>>=1;
00379             dstUnCompPtr+= BLOCK_NUM_PIXEL;
00380             srcPtr+= BLOCK_DXTC_SIZE;
00381         }
00382     }
00383 
00384     // **** build the color version for all masks.
00385 
00386     for(uint maskId= 0; maskId<_Masks.size();maskId++)
00387     {
00388         CMask           &mask= _Masks[maskId];
00389         // unpack colDelta, and prepare for use with CFastHLSModifier.
00390         uint8           dHue= colDeltaList[maskId].DHue;
00391         uint            dLum= 0xFFFFFF00 + colDeltaList[maskId].DLum*2;
00392         uint            dSat= 0xFFFFFF00 + colDeltaList[maskId].DSat*2;
00393 
00394         // get a ptr on alpha of mixt block.
00395         uint8           *alphaMixtBlock= &mask.Data[0];
00396 
00397 
00398         // ---- for all Fullblock ot this mask, color and store in dstTexture
00399         // start at full Block bits desc
00400         bitPtr= (uint32*)(&mask.Data[mask.FullBlockIndex]);
00401         uint32  *bitCompPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
00402         srcPtr= &_Texture[0];
00403         dstPtr= &dstTexture[0];
00404         dstUnCompPtr= &dstUnCompTexture[0];
00405         n= numBlocks;
00406         // run all blocks.
00407         while(n>0)
00408         {
00409             uint    nBits= min(n, 32U);
00410             // get Full block mask.
00411             getBitPack(bitPtr, bitMask);
00412             n-= nBits;
00413             bitPtr++;
00414             // get Compress mask.
00415             uint32  bitCompMask;
00416             getBitPack(bitCompPtr, bitCompMask);
00417             bitCompPtr++;
00418             // for all bits
00419             for(;nBits>0;nBits--)
00420             {
00421                 // need to colorize??
00422                 if(bitMask&1)
00423                 {
00424                     // colorize this block. ignore alpha
00425                     colorizeDXTCBlockRGB(srcPtr, dstPtr, dHue, dLum, dSat);
00426                     // If this block is "a block to recompress", then must uncompress it in dstUnCompPtr
00427                     uncompressBlockRGB(dstPtr, dstUnCompPtr);
00428                 }
00429                 bitMask>>=1;
00430                 bitCompMask>>=1;
00431                 srcPtr+= BLOCK_DXTC_SIZE;
00432                 dstPtr+= BLOCK_DXTC_SIZE;
00433                 dstUnCompPtr+= BLOCK_NUM_PIXEL;
00434             }
00435         }
00436 
00437         // ---- for all mixtblock ot this mask, color, uncompress and blend in store in dstUnCompTexture
00438         static  uint8   tmpColoredBlockDXTC[BLOCK_NUM_PIXEL];
00439         static  CRGBA   tmpColoredBlockRGBA[BLOCK_NUM_PIXEL];
00440         // start at mixt Block bits desc
00441         bitPtr= (uint32*)(&mask.Data[mask.MixtBlockIndex]);
00442         srcPtr= &_Texture[0];
00443         dstUnCompPtr= &dstUnCompTexture[0];
00444         n= numBlocks;
00445         // run all blocks.
00446         while(n>0)
00447         {
00448             uint    nBits= min(n, 32U);
00449             getBitPack(bitPtr, bitMask);
00450             n-= nBits;
00451             bitPtr++;
00452             for(;nBits>0;nBits--)
00453             {
00454                 // need to colorize??
00455                 if(bitMask&1)
00456                 {
00457                     // colorize this block. store 2 colors in tmp
00458                     colorizeDXTCBlockRGB(srcPtr, tmpColoredBlockDXTC, dHue, dLum, dSat);
00459                     // copy RGB bits from src to tmp
00460                     ((uint32*)tmpColoredBlockDXTC)[3]= ((uint32*)srcPtr)[3];
00461 
00462                     // uncompress the block.
00463                     uncompressBlockRGB(tmpColoredBlockDXTC, tmpColoredBlockRGBA);
00464 
00465                     // blend tmpColoredBlockRGBA into dstUnCompPtr, according to alphaMixtBlock.
00466                     for(uint i=0;i<16;i++)
00467                     {
00468                         dstUnCompPtr[i].blendFromuiRGBOnly(dstUnCompPtr[i], tmpColoredBlockRGBA[i], *alphaMixtBlock);
00469                         // next pixel
00470                         alphaMixtBlock++;
00471                     }
00472                 }
00473                 bitMask>>=1;
00474                 srcPtr+= BLOCK_DXTC_SIZE;
00475                 dstUnCompPtr+= BLOCK_NUM_PIXEL;
00476             }
00477         }
00478 
00479     }
00480 
00481 
00482     // Since colorizeDXTCBlockRGB() use MMX, must end with emms.
00483 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
00484     if(CSystemInfo::hasMMX())
00485         _asm    emms;
00486 #endif
00487 
00488 
00489     // **** compress needed blocks
00490     n= numBlocks;
00491     bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]);
00492     dstUnCompPtr= &dstUnCompTexture[0];
00493     dstPtr= &dstTexture[0];
00494     while(n>0)
00495     {
00496         uint    nBits= min(n, 32U);
00497         getBitPack(bitPtr, bitMask);
00498         n-= nBits;
00499         bitPtr++;
00500         for(;nBits>0;nBits--)
00501         {
00502             // need to compress ??
00503             if(bitMask&1)
00504             {
00505                 // uncompress this block. ignore alpha
00506                 compressBlockRGB(dstUnCompPtr, dstPtr);
00507             }
00508             bitMask>>=1;
00509             dstUnCompPtr+= BLOCK_NUM_PIXEL;
00510             dstPtr+= BLOCK_DXTC_SIZE;
00511         }
00512     }
00513 
00514     // **** format bitmap out with dstTexture.
00515     out.reset(CBitmap::DXTC5);
00516     out.resize(_Width, _Height, CBitmap::DXTC5);
00517 
00518     // create and fill all the mipMaps
00519     uint    w= _Width, h=_Height;
00520     dstPtr= &dstTexture[0];
00521     for(uint m=0;m<_NumMipMap;m++)
00522     {
00523         // allocate.
00524         out.resizeMipMap(m, w, h);
00525         // get the size of this DXTC5 level.
00526         uint    size= out.getPixels(m).size();
00527         // fill
00528         memcpy(&out.getPixels(m)[0], dstPtr, size);
00529         // next mipmap
00530         dstPtr+= size;
00531         w= (w+1)/2;
00532         h= (h+1)/2;
00533     }
00534     // verify all filled
00535     nlassert( dstPtr== (&dstTexture[0] + dstTexture.size()) );
00536 
00537     // set the correct num of mipmap
00538     out.setMipMapCount(_NumMipMap);
00539 }
00540 
00541 
00542 // ***************************************************************************
00543 void            CHLSColorTexture::colorizeDXTCBlockRGB(const uint8 *srcPtr, uint8 *dstPtr, uint8 dHue, uint dLum, uint dSat)
00544 {
00545     // get modifier.
00546     CFastHLSModifier    &fastHLS= CFastHLSModifier::getInstance();
00547 
00548     // apply the color on the 2 DXTC colors
00549     *(uint16*)(dstPtr+8 )= fastHLS.applyHLSMod(*(uint16*)(srcPtr+8 ) , dHue, dLum, dSat);
00550     *(uint16*)(dstPtr+10)= fastHLS.applyHLSMod(*(uint16*)(srcPtr+10) , dHue, dLum, dSat);
00551 }
00552 
00553 
00554 // ***************************************************************************
00555 void            CHLSColorTexture::uncompressBlockRGB(const uint8* srcDXTC, CRGBA *dstRGBA)
00556 {
00557     CRGBA   c[4];
00558 
00559     uint16 color0;
00560     uint16 color1;
00561     uint32 bits;
00562     color0= *(uint16*)(srcDXTC+8);
00563     color1= *(uint16*)(srcDXTC+10);
00564     bits=   *(uint32*)(srcDXTC+12);
00565 
00566     c[0].set565(color0);
00567     c[1].set565(color1);
00568 
00569     // ignore color0>color1 for DXT3 and DXT5.
00570     c[2].blendFromui(c[0],c[1],85);
00571     c[3].blendFromui(c[0],c[1],171);
00572 
00573     // bits to color (ignore alpha result)
00574     for(uint n= 16;n>0;n--)
00575     {
00576         *dstRGBA= c[bits&3];
00577         bits>>=2;
00578         dstRGBA++;
00579     }
00580 }
00581 
00582 
00583 // ***************************************************************************
00584 void        CHLSColorTexture::computeMinMax(sint *diffBlock, CVectorInt &v, sint mean[3], sint rgb0[3], sint rgb1[3])
00585 {
00586     // compute the min and max distance along the axis v.
00587     sint    mind= INT_MAX;
00588     sint    maxd= INT_MIN;
00589     sint    *srcDiff= diffBlock;
00590     // for the 16 pixels
00591     for(uint n=16;n>0;n--,srcDiff+=3)
00592     {
00593         sint    R= srcDiff[0];
00594         sint    G= srcDiff[1];
00595         sint    B= srcDiff[2];
00596         sint    d= R*v.x + G*v.y + B*v.z;
00597         if(d<mind)
00598             mind= d;
00599         if(d>maxd)
00600             maxd= d;
00601     }
00602 
00603     // avoid overflow. here, Higher possible bit is 16+8+2 (add of 3 values=> *4) == 26
00604     // 26-12= 14. 14+16=30 => ok.
00605     mind>>= 12;
00606     maxd>>= 12;
00607 
00608     // compute the 2 colors: rgb0 on the min, and rgb1 on the max
00609     rgb0[0]= mean[0]+ (mind*v.x>>20);
00610     rgb0[1]= mean[1]+ (mind*v.y>>20);
00611     rgb0[2]= mean[2]+ (mind*v.z>>20);
00612     rgb1[0]= mean[0]+ (maxd*v.x>>20);
00613     rgb1[1]= mean[1]+ (maxd*v.y>>20);
00614     rgb1[2]= mean[2]+ (maxd*v.z>>20);
00615     // clamp to 0..255
00616     fastClamp8(rgb0[0]);
00617     fastClamp8(rgb0[1]);
00618     fastClamp8(rgb0[2]);
00619     fastClamp8(rgb1[0]);
00620     fastClamp8(rgb1[1]);
00621     fastClamp8(rgb1[2]);
00622 }
00623 
00624 
00625 // ***************************************************************************
00626 void            CHLSColorTexture::compressBlockRGB(CRGBA *srcRGBA, uint8* dstDXTC)
00627 {
00628     // skip alpha part.
00629     uint8   *dstBlock= dstDXTC+8;
00630 
00631 
00632     // **** compute RGB0 and RGB1.
00633     uint    i,j,n;
00634 
00635     // compute the mean color of 16 pixels
00636     sint    mean[3];
00637     mean[0]= 0;
00638     mean[1]= 0;
00639     mean[2]= 0;
00640     CRGBA   *src= srcRGBA;
00641     for(n=16;n>0;n--,src++)
00642     {
00643         mean[0]+= src->R;
00644         mean[1]+= src->G;
00645         mean[2]+= src->B;
00646         // at same time, setup alpha to 0. Important for "compute bits" part (see MMX)!!
00647         src->A= 0;
00648     }
00649     mean[0]>>= 4;
00650     mean[1]>>= 4;
00651     mean[2]>>= 4;
00652 
00653     // compute col-mean
00654     sint    diffBlock[16*3];
00655     src= srcRGBA;
00656     sint    *srcDiff= diffBlock;
00657     for(n=16;n>0;n--,src++,srcDiff+=3)
00658     {
00659         srcDiff[0]= (sint)src->R - mean[0];
00660         srcDiff[1]= (sint)src->G - mean[1];
00661         srcDiff[2]= (sint)src->B - mean[2];
00662     }
00663 
00664 
00665     // compute the covariant matrix.
00666     sint    coMat[3][3];
00667     // Apply std RGB factor (0.3, 0.56, 0.14) to choose the best Axis. This give far much best results.
00668     sint    rgbFact[3]= {77, 143, 36};
00669     for(i=0;i<3;i++)
00670     {
00671         // OPTIMIZE SINCE SYMETRIX MATRIX
00672         for(j=i;j<3;j++)
00673         {
00674             sint32  factor= 0;
00675             // divide / 16 to avoid overflow sint32
00676             uint    colFactor= (rgbFact[i]*rgbFact[j]) >> 4;
00677             // run all 16 pixels.
00678             sint    *srcDiff= diffBlock;
00679             for(n=16;n>0;n--,srcDiff+=3)
00680             {
00681                 factor+= srcDiff[i] * srcDiff[j] * colFactor;
00682             }
00683             coMat[i][j]= factor;
00684         }
00685     }
00686     // Fill symetrix matrix
00687     coMat[1][0]= coMat[0][1];
00688     coMat[2][0]= coMat[0][2];
00689     coMat[2][1]= coMat[1][2];
00690 
00691 
00692     // take the bigger vector
00693     sint    maxSize= 0;
00694     uint    axis= 0;
00695     for(i=0;i<3;i++)
00696     {
00697         // Use abs since sqr fails because all sint32 range may be used.
00698         sint    size= abs(coMat[i][0]) + abs(coMat[i][1]) + abs(coMat[i][2]);
00699         if(size>maxSize)
00700         {
00701             maxSize= size;
00702             axis= i;
00703         }
00704     }
00705 
00706     // normalize this vector
00707     CVector v;
00708     // remove some rgb factor...
00709     v.x= (float)coMat[axis][0]/rgbFact[0];
00710     v.y= (float)coMat[axis][1]/rgbFact[1];
00711     v.z= (float)coMat[axis][2]/rgbFact[2];
00712     v.normalize();
00713     // set a Fixed 16:16.
00714     CVectorInt  vInt;
00715     // don't bother if OptFastFloorBegin() has been called. 16:16 precision is sufficient.
00716     vInt.x= OptFastFloor(v.x*65536);
00717     vInt.y= OptFastFloor(v.y*65536);
00718     vInt.z= OptFastFloor(v.z*65536);
00719 
00720 
00721     // For all pixels, choose the 2 colors along the axis
00722     sint    rgb0[3];
00723     sint    rgb1[3];
00724     computeMinMax(diffBlock, vInt, mean, rgb0, rgb1);
00725 
00726     // Average to 16 bits. NB: correclty encode 0..255 to 0.31 or 0..63.
00727     uint    R,G,B;
00728     R= ((rgb0[0]*7967+32768)>>16);
00729     G= ((rgb0[1]*16191+32768)>>16);
00730     B= ((rgb0[2]*7967+32768)>>16);
00731     uint16  rgb016= (R<<11) + (G<<5) + (B);
00732     R= ((rgb1[0]*7967+32768)>>16);
00733     G= ((rgb1[1]*16191+32768)>>16);
00734     B= ((rgb1[2]*7967+32768)>>16);
00735     uint16  rgb116= (R<<11) + (G<<5) + (B);
00736     // copy to block
00737     ((uint16*)dstBlock)[0]= rgb016;
00738     ((uint16*)dstBlock)[1]= rgb116;
00739 
00740 
00741     // **** compute bits
00742     CRGBA   c[4];
00743     c[0].set565(rgb016);
00744     c[1].set565(rgb116);
00745     c[2].blendFromui(c[0],c[1],85);
00746     c[3].blendFromui(c[0],c[1],171);
00747     // it is important that c[] and src Alpha are set to 0, because of "pmaddwd" use in MMX code...
00748     c[0].A= 0;
00749     c[1].A= 0;
00750     c[2].A= 0;
00751     c[3].A= 0;
00752     CRGBA   *cPtr= c;
00753 
00754     // result.
00755     uint32  bits= 0;
00756 
00757 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
00758     if(CSystemInfo::hasMMX())
00759     {
00760         // preapre mmx
00761         uint64  blank= 0;
00762         __asm
00763         {
00764             movq        mm7, blank
00765         }
00766 
00767         // for 16 pixels
00768         src= srcRGBA;
00769         for(n=16;n>0;n--,src++)
00770         {
00771             /* // C Version (+ little asm).
00772             uint    minDist= 0xFFFFFFFF;
00773             uint    id= 0;
00774             for(i=0;i<4;i++)
00775             {
00776                 // applying factors such *23, *80, *6 gives better results, but slower (in MMX).
00777                 uint    dist= sqr((sint)src->R-(sint)c[i].R);
00778                 dist+= sqr((sint)src->G-(sint)c[i].G);
00779                 dist+= sqr((sint)src->B-(sint)c[i].B);
00780                 if(dist<minDist)
00781                 {
00782                     minDist= dist;
00783                     id= i;
00784                 }
00785             }
00786             bits|=id;
00787             __asm
00788             {
00789                 mov eax, bits
00790                 ror eax, 2
00791                 mov bits, eax
00792             }*/
00793             __asm
00794             {
00795                 mov         esi, src
00796                 mov         edi, cPtr
00797 
00798                 mov         ecx, 4
00799                 mov         edx, 0xFFFFFFFF // edx= minDist
00800 
00801                 movd        mm0, [esi]
00802                 punpcklbw   mm0, mm7
00803 
00804                 mov         esi, 4          // esi= id MinDist (inverted)
00805 
00806                 // compare 4 cases.
00807             myLoop:
00808                 movd        mm1, [edi]
00809                 punpcklbw   mm1, mm7
00810                 psubsw      mm1, mm0
00811                 pmaddwd     mm1, mm1
00812                 movd        eax, mm1
00813                 psrlq       mm1, 32
00814                 movd        ebx, mm1
00815                 add         eax, ebx
00816 
00817                 // take smaller of A and B. here: eax= A, edx= B
00818                 sub         eax, edx        // eax= A-B
00819                 sbb         ebx, ebx        // ebx= FF if A<B.
00820                 and         eax, ebx        // eax= A-B if A<B
00821                 add         edx, eax        // if A<B, edx= B+A-B= A, else, edx= B. => minimum
00822                 // setup the "smaller" id. here esi= iB, ecx= iA
00823                 not         ebx             // ebx= 0 if A<B, FF else
00824                 sub         esi, ecx        // esi= iB-iA
00825                 and         esi, ebx        // esi= 0 if A<B, iB-iA else
00826                 add         esi, ecx        // esi= 0+iA= iA if A<B, else esi= iB-iA+iA= iB
00827 
00828                 add         edi, 4
00829                 dec         ecx
00830                 jnz         myLoop
00831 
00832                 // reverse id
00833                 mov         edx, 4
00834                 mov         eax, bits
00835                 sub         edx, esi
00836                 // and store into bits
00837                 or          eax, edx
00838                 ror         eax, 2
00839                 mov         bits, eax
00840             }
00841         }
00842 
00843 
00844         // end MMX block.
00845         __asm   emms;
00846     }
00847     else
00848 #endif  // NL_OS_WINDOWS
00849     {
00850         src= srcRGBA;
00851         for(n=16;n>0;n--,src++)
00852         {
00853             // C Version (+ little asm).
00854             uint    minDist= 0xFFFFFFFF;
00855             uint    id= 0;
00856             for(i=0;i<4;i++)
00857             {
00858                 // applying factors such *23, *80, *6 gives better results, but slower (in MMX).
00859                 uint    dist= sqr((sint)src->R-(sint)c[i].R);
00860                 dist+= sqr((sint)src->G-(sint)c[i].G);
00861                 dist+= sqr((sint)src->B-(sint)c[i].B);
00862                 if(dist<minDist)
00863                 {
00864                     minDist= dist;
00865                     id= i;
00866                 }
00867             }
00868             // a ror is faster, but full C version
00869             bits|= id<<30;
00870             // don't do it for the last.
00871             if(n>1)
00872                 bits>>=2;
00873         }
00874     }
00875 
00876     // copy
00877     ((uint32*)dstBlock)[1]= bits;
00878 }
00879 
00880 
00881 } // NL3D

Generated on Thu Jan 7 08:26:26 2010 for NeL by  doxygen 1.6.1