lod_character_manager.cpp

Go to the documentation of this file.
00001 
00005 /* Copyright, 2000-2002 Nevrax Ltd.
00006  *
00007  * This file is part of NEVRAX NEL.
00008  * NEVRAX NEL is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2, or (at your option)
00011  * any later version.
00012 
00013  * NEVRAX NEL is distributed in the hope that it will be useful, but
00014  * WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016  * General Public License for more details.
00017 
00018  * You should have received a copy of the GNU General Public License
00019  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00020  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00021  * MA 02111-1307, USA.
00022  */
00023 
00024 #include "std3d.h"
00025 
00026 #include "nel/misc/common.h"
00027 #include "nel/3d/lod_character_manager.h"
00028 #include "nel/3d/lod_character_shape.h"
00029 #include "nel/3d/lod_character_shape_bank.h"
00030 #include "nel/3d/lod_character_instance.h"
00031 #include "nel/misc/hierarchical_timer.h"
00032 #include "nel/misc/fast_floor.h"
00033 #include "nel/3d/lod_character_texture.h"
00034 #include "nel/3d/ray_mesh.h"
00035 #include "nel/misc/file.h"
00036 #include "nel/misc/algo.h"
00037 #include "nel/misc/fast_mem.h"
00038 #include "nel/misc/system_info.h"
00039 
00040 
00041 using   namespace std;
00042 using   namespace NLMISC;
00043 
00044 namespace NL3D
00045 {
00046 
00047 
00048 // ***************************************************************************
00049 // Dest is without Normal because precomputed
00050 #define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
00051 #define NL3D_CLOD_VERTEX_SIZE   24
00052 #define NL3D_CLOD_UV_OFF        12
00053 #define NL3D_CLOD_COLOR_OFF     20
00054 
00055 // size (in block) of the big texture.
00056 #define NL3D_CLOD_TEXT_NLOD_WIDTH   16
00057 #define NL3D_CLOD_TEXT_NLOD_HEIGHT  16
00058 #define NL3D_CLOD_TEXT_NUM_IDS      NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
00059 #define NL3D_CLOD_BIGTEXT_WIDTH     NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
00060 #define NL3D_CLOD_BIGTEXT_HEIGHT    NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
00061 
00062 // Default texture color. Alpha must be 255
00063 #define NL3D_CLOD_DEFAULT_TEXCOLOR  CRGBA(255,255,255,255)
00064 
00065 
00066 // ***************************************************************************
00067 CLodCharacterManager::CLodCharacterManager()
00068 {
00069     _MaxNumVertices= 3000;
00070     _NumVBHard= 8;
00071     _Rendering= false;
00072     _LockDone= false;
00073 
00074     // setup the texture.
00075     _BigTexture= new CTextureBlank;
00076     // The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
00077     // NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
00078     _BigTexture->setReleasable(false);
00079     // create the bitmap.
00080     _BigTexture->resize(NL3D_CLOD_BIGTEXT_WIDTH, NL3D_CLOD_BIGTEXT_HEIGHT, CBitmap::RGBA);
00081     // Format of texture, 16 bits and no mipmaps.
00082     _BigTexture->setUploadFormat(ITexture::RGB565);
00083     _BigTexture->setFilterMode(ITexture::Linear, ITexture::LinearMipMapOff);
00084     _BigTexture->setWrapS(ITexture::Clamp);
00085     _BigTexture->setWrapT(ITexture::Clamp);
00086 
00087     // Alloc free Ids
00088     _FreeIds.resize(NL3D_CLOD_TEXT_NUM_IDS);
00089     for(uint i=0;i<_FreeIds.size();i++)
00090     {
00091         _FreeIds[i]= i;
00092     }
00093 
00094     // setup the material
00095     _Material.initUnlit();
00096     _Material.setAlphaTest(true);
00097     _Material.setDoubleSided(true);
00098     _Material.setTexture(0, _BigTexture);
00099 
00100     // setup for lighting, Default for Ryzom setup
00101     _LightCorrectionMatrix.rotateZ((float)Pi/2);
00102     _LightCorrectionMatrix.invert();
00103     NL_SET_IB_NAME(_Triangles, "CLodCharacterManager::_Triangles");
00104 }
00105 
00106 
00107 // ***************************************************************************
00108 CLodCharacterManager::~CLodCharacterManager()
00109 {
00110     reset();
00111 }
00112 
00113 // ***************************************************************************
00114 void            CLodCharacterManager::reset()
00115 {
00116     nlassert(!isRendering());
00117 
00118     // delete shapeBanks.
00119     for(uint i=0;i<_ShapeBankArray.size();i++)
00120     {
00121         if(_ShapeBankArray[i])
00122             delete _ShapeBankArray[i];
00123     }
00124 
00125     // clears containers
00126     contReset(_ShapeBankArray);
00127     contReset(_ShapeMap);
00128 
00129     // reset render part.
00130     _VertexStream.release();
00131 }
00132 
00133 // ***************************************************************************
00134 uint32          CLodCharacterManager::createShapeBank()
00135 {
00136     // search a free entry
00137     for(uint i=0;i<_ShapeBankArray.size();i++)
00138     {
00139         // if ree, use it.
00140         if(_ShapeBankArray[i]==NULL)
00141         {
00142             _ShapeBankArray[i]= new CLodCharacterShapeBank;
00143             return i;
00144         }
00145     }
00146 
00147     // no free entrey, resize array.
00148     _ShapeBankArray.push_back(new CLodCharacterShapeBank);
00149     return _ShapeBankArray.size()-1;
00150 }
00151 
00152 // ***************************************************************************
00153 const CLodCharacterShapeBank    *CLodCharacterManager::getShapeBank(uint32 bankId) const
00154 {
00155     if(bankId>=_ShapeBankArray.size())
00156         return NULL;
00157     else
00158         return _ShapeBankArray[bankId];
00159 }
00160 
00161 // ***************************************************************************
00162 CLodCharacterShapeBank  *CLodCharacterManager::getShapeBank(uint32 bankId)
00163 {
00164     if(bankId>=_ShapeBankArray.size())
00165         return NULL;
00166     else
00167         return _ShapeBankArray[bankId];
00168 }
00169 
00170 // ***************************************************************************
00171 void            CLodCharacterManager::deleteShapeBank(uint32 bankId)
00172 {
00173     if(bankId>=_ShapeBankArray.size())
00174     {
00175         if(_ShapeBankArray[bankId])
00176         {
00177             delete _ShapeBankArray[bankId];
00178             _ShapeBankArray[bankId]= NULL;
00179         }
00180     }
00181 }
00182 
00183 // ***************************************************************************
00184 sint32          CLodCharacterManager::getShapeIdByName(const std::string &name) const
00185 {
00186     CstItStrIdMap   it= _ShapeMap.find(name);
00187     if(it==_ShapeMap.end())
00188         return -1;
00189     else
00190         return it->second;
00191 }
00192 
00193 // ***************************************************************************
00194 const CLodCharacterShape    *CLodCharacterManager::getShape(uint32 shapeId) const
00195 {
00196     // split the id
00197     uint    bankId= shapeId >> 16;
00198     uint    shapeInBankId= shapeId &0xFFFF;
00199 
00200     // if valid bankId
00201     const CLodCharacterShapeBank    *shapeBank= getShapeBank(bankId);
00202     if(shapeBank)
00203     {
00204         // return the shape from the bank
00205         return shapeBank->getShape(shapeInBankId);
00206     }
00207     else
00208         return NULL;
00209 }
00210 
00211 // ***************************************************************************
00212 bool            CLodCharacterManager::compile()
00213 {
00214     bool    error= false;
00215 
00216     // clear the map
00217     contReset(_ShapeMap);
00218 
00219     // build the map
00220     for(uint i=0; i<_ShapeBankArray.size(); i++)
00221     {
00222         if(_ShapeBankArray[i])
00223         {
00224             // Parse all Shapes
00225             for(uint j=0; j<_ShapeBankArray[i]->getNumShapes(); j++)
00226             {
00227                 // build the shape Id
00228                 uint    shapeId= (i<<16) + j;
00229 
00230                 // get the shape
00231                 const CLodCharacterShape    *shape= _ShapeBankArray[i]->getShape(j);
00232                 if(shape)
00233                 {
00234                     const string &name= shape->getName();
00235                     ItStrIdMap  it= _ShapeMap.find(name);
00236                     if(it == _ShapeMap.end())
00237                         // insert the id in the map
00238                         _ShapeMap.insert(make_pair(name, shapeId));
00239                     else
00240                     {
00241                         error= true;
00242                         nlwarning("Found a Character Lod with same name in the manager: %s", name.c_str());
00243                     }
00244                 }
00245             }
00246         }
00247     }
00248 
00249     return error;
00250 }
00251 
00252 // ***************************************************************************
00253 // ***************************************************************************
00254 // Render
00255 // ***************************************************************************
00256 // ***************************************************************************
00257 
00258 
00259 // ***************************************************************************
00260 void            CLodCharacterManager::setMaxVertex(uint32 maxVertex)
00261 {
00262     // we must not be between beginRender() and endRender()
00263     nlassert(!isRendering());
00264     _MaxNumVertices= maxVertex;
00265 }
00266 
00267 // ***************************************************************************
00268 void            CLodCharacterManager::setVertexStreamNumVBHard(uint32 numVBHard)
00269 {
00270     // we must not be between beginRender() and endRender()
00271     nlassert(!isRendering());
00272     _NumVBHard= numVBHard;
00273 }
00274 
00275 // ***************************************************************************
00276 void            CLodCharacterManager::beginRender(IDriver *driver, const CVector &managerPos)
00277 {
00278     H_AUTO( NL3D_CharacterLod_beginRender );
00279 
00280     // we must not be between beginRender() and endRender()
00281     nlassert(!isRendering());
00282 
00283     // Reset render
00284     //=================
00285     _CurrentVertexId=0;
00286     _CurrentTriId= 0;
00287 
00288     // update Driver.
00289     //=================
00290     nlassert(driver);
00291 
00292     // test change of vertexStream setup
00293     bool    mustChangeVertexStream= _VertexStream.getDriver() != driver;
00294     if(!mustChangeVertexStream)
00295     {
00296         mustChangeVertexStream= _MaxNumVertices != _VertexStream.getMaxVertices();
00297         mustChangeVertexStream= mustChangeVertexStream || _NumVBHard != _VertexStream.getNumVB();
00298     }
00299     // re-init?
00300     if( mustChangeVertexStream )
00301     {
00302         // chech offset
00303         CVertexBuffer   vb;
00304         vb.setVertexFormat(NL3D_CLOD_VERTEX_FORMAT);
00305         // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
00306         nlassert( NL3D_CLOD_UV_OFF == vb.getTexCoordOff());
00307         nlassert( NL3D_CLOD_COLOR_OFF == vb.getColorOff());
00308 
00309         // Setup the vertex stream
00310         _VertexStream.release();
00311         _VertexStream.init(driver, NL3D_CLOD_VERTEX_FORMAT, _MaxNumVertices, _NumVBHard, "CLodManagerVB", false); // nb : don't use volatile lock as we keep the buffer locked
00312     }
00313 
00314     // prepare for render.
00315     //=================
00316 
00317     // Do not Lock Buffer now (will be done at the first instance added)
00318     nlassert(!_LockDone);
00319     _VertexSize= _VertexStream.getVertexSize();
00320     // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
00321     nlassert( _VertexSize == NL3D_CLOD_VERTEX_SIZE );   // Vector + Normal + UV + RGBA
00322 
00323 
00324     // Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
00325     if(_Triangles.getNumIndexes()<_MaxNumVertices * 2)
00326     {
00327         _Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
00328         _Triangles.setNumIndexes(_MaxNumVertices * 2);
00329     }
00330 
00331     // Local manager matrix
00332     _ManagerMatrixPos= managerPos;
00333 
00334     // Ok, start rendering
00335     _Rendering= true;
00336 }
00337 
00338 
00339 // ***************************************************************************
00340 static inline void  computeLodLighting(CRGBA &lightRes, const CVector &lightObjectSpace, const CVector &normalPtr, CRGBA ambient, CRGBA diffuse)
00341 {
00342     float   f= lightObjectSpace * normalPtr;
00343     sint    f8= NLMISC::OptFastFloor(f);
00344     fastClamp8(f8);
00345     sint    r,g,b;
00346     r= (diffuse.R * f8)>>8;
00347     g= (diffuse.G * f8)>>8;
00348     b= (diffuse.B * f8)>>8;
00349     r+= ambient.R;
00350     g+= ambient.G;
00351     b+= ambient.B;
00352     fastClamp8(r);
00353     fastClamp8(g);
00354     fastClamp8(b);
00355     lightRes.R= r;
00356     lightRes.G= g;
00357     lightRes.B= b;
00358 }
00359 
00360 
00361 // ***************************************************************************
00362 bool            CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instance, const CMatrix &worldMatrix,
00363     CRGBA paramAmbient, CRGBA paramDiffuse, const CVector &lightDir)
00364 {
00365     H_AUTO ( NL3D_CharacterLod_AddRenderKey )
00366 
00367     nlassert(_VertexStream.getDriver());
00368     // we must be between beginRender() and endRender()
00369     nlassert(isRendering());
00370 
00371 
00372     // regroup all variables that will be accessed in the ASM loop (minimize cache problems)
00373     uint            numVertices;
00374     const CLodCharacterShape::CVector3s     *vertPtr;
00375     const CVector   *normalPtr;
00376     const CUV       *uvPtr;
00377     const uint8     *alphaPtr;
00378     CVector         lightObjectSpace;
00379     CVector         matPos;
00380     float           a00, a01, a02;
00381     float           a10, a11, a12;
00382     float           a20, a21, a22;
00383     sint            f8;
00384     uint64          blank= 0;
00385     CRGBA           ambient= paramAmbient;
00386     CRGBA           diffuse= paramDiffuse;
00387     // For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
00388     ambient.A= 0;
00389     diffuse.A= 0;
00390 
00391 
00392     // Get the Shape and current key.
00393     //=============
00394 
00395     // get the shape
00396     const CLodCharacterShape    *clod= getShape(instance.ShapeId);
00397     // if not found quit, return true
00398     if(!clod)
00399         return true;
00400 
00401     // get UV/Normal array. NULL => error
00402     normalPtr= clod->getNormals();
00403     // get UV of the instance
00404     uvPtr= instance.getUVs();
00405     // uvPtr is NULL means that initInstance() has not been called!!
00406     nlassert(normalPtr && uvPtr);
00407 
00408     // get the anim key
00409     CVector     unPackScaleFactor;
00410     vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
00411     // if not found quit, return true
00412     if(!vertPtr)
00413         return true;
00414     // get num verts
00415     numVertices= clod->getNumVertices();
00416 
00417     // empty shape??
00418     if(numVertices==0)
00419         return true;
00420 
00421     // If too many vertices, quit, returning false.
00422     if(_CurrentVertexId+numVertices > _MaxNumVertices)
00423         return false;
00424 
00425     // get alpha array
00426     static  vector<uint8>   defaultAlphaArray;
00427     // get the instance alpha if correctly setuped
00428     if(instance.VertexAlphas.size() == numVertices)
00429     {
00430         alphaPtr= &instance.VertexAlphas[0];
00431     }
00432     // if error, take 255 as alpha.
00433     else
00434     {
00435         // NB: still use an array. This case should never arise, but support it not at full optim.
00436         if(defaultAlphaArray.size()<numVertices)
00437             defaultAlphaArray.resize(numVertices, 255);
00438         alphaPtr= &defaultAlphaArray[0];
00439     }
00440 
00441     // Lock Buffer if not done
00442     //=============
00443 
00444     // Do this after code above because we are sure that we will fill something (numVertices>0)
00445     if(!_LockDone)
00446     {
00447         _VertexData= _VertexStream.lock();
00448         _LockDone= true;
00449     }
00450 
00451     // After lock, For D3D, the VertexColor may be in BGRA format
00452     if(_VertexStream.isBRGA())
00453     {
00454         // then swap only the B and R (no cpu cycle added per vertex)
00455         ambient.swapBR();
00456         diffuse.swapBR();
00457     }
00458 
00459 
00460     // Prepare Transform
00461     //=============
00462 
00463     // HTimerInfo: all this block takes 0.1%
00464 
00465     // Get matrix pos.
00466     matPos= worldMatrix.getPos();
00467     // compute in manager space.
00468     matPos -= _ManagerMatrixPos;
00469     // Get rotation line vectors
00470     const float *worldM= worldMatrix.get();
00471     a00= worldM[0]; a01= worldM[4]; a02= worldM[8];
00472     a10= worldM[1]; a11= worldM[5]; a12= worldM[9];
00473     a20= worldM[2]; a21= worldM[6]; a22= worldM[10];
00474 
00475     // get the light in object space.
00476     // Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
00477     lightObjectSpace.x= a00 * lightDir.x + a10 * lightDir.y + a20 * lightDir.z;
00478     lightObjectSpace.y= a01 * lightDir.x + a11 * lightDir.y + a21 * lightDir.z;
00479     lightObjectSpace.z= a02 * lightDir.x + a12 * lightDir.y + a22 * lightDir.z;
00480     // animation User correction
00481     lightObjectSpace= _LightCorrectionMatrix.mulVector(lightObjectSpace);
00482     // normalize, and neg for Dot Product.
00483     lightObjectSpace.normalize();
00484     lightObjectSpace= -lightObjectSpace;
00485     // preMul by 255 for RGBA uint8
00486     lightObjectSpace*= 255;
00487 
00488     // multiply matrix with scale factor for Pos.
00489     a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
00490     a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
00491     a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
00492 
00493     // get dst Array.
00494     uint8   *dstPtr;
00495     dstPtr= _VertexData + _CurrentVertexId * _VertexSize;
00496 
00497 
00498     /* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
00499         maybe because the cache miss improve //ism a bit below.
00500     */
00501 
00502     // Fill the VB
00503     //=============
00504 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
00505     // optimized version
00506     if(CSystemInfo::hasMMX())
00507     {
00508         H_AUTO( NL3D_CharacterLod_vertexFill );
00509 
00510         if(numVertices)
00511         {
00512             /* NB: order is important for AGP filling optimisation in dstPtr
00513 
00514                 Pentium2+ optimisation notes:
00515 
00516                 - "uop" comment formating:
00517                     A/B means "A micro-ops in port 0, and B micro-ops in port 2".  (port 1 is very rare for FPU)
00518                     A/B/C/D means "A micro-ops in port 0, B in port 2, C in port 3 and D in port 4".
00519                     The number in () is the delay (if any).
00520                 - the "compute lighting part" must done first, because of the "fistp f8" mem writes that must
00521                         be place far away from the "mov eax, f8" read in clamp lighting part
00522                         (else seems that it crashes all the //ism)
00523                 - No need to Interleave on Pentium2+. But prevents "write/read stall" by putting the write
00524                     far away from the next read. Else stall of 3 cycles + BIG BREAK OF //ism (I think).
00525                     This had save me 120 cycles / 240 !!!
00526 
00527                 BenchResults:
00528                 - The "transform vertex part" and "all next part" cost 42 cycles, but is somewhat optimal:
00529                     63 uop (=> min 21 cycles), but 36 uop in the P0 port (=> this is the bottleneck)
00530                 - The lighting part adds 1 cycle only ?????  (44 cycles) But still relevant and optimal:
00531                     43 uop in port P0!!!!
00532                 - The UV part adds 4 cycles (47) (should not since 0 in Port P0), still acceptable.
00533                 - The clamp part adds 3 cycles (50), and add 11 cycles in "P0 or P1" (but heavy dependency)
00534                     If we assume all goes into P1, it should takes 0... still acceptable (optimal==43?)
00535                 - The alpha part adds 2 cycles (52, optimal=45). OK.
00536                 - The modulate part adds 15 cycles. OK
00537 
00538                 TOTAL: 67 cycles in theory (write in RAM, no cache miss problem)
00539                 BENCH: ASM version: 91 cycles (Write in AGP, some cache miss problems, still good against 67)
00540                        C version: 316 cycles.
00541             */
00542             __asm
00543             {
00544                 mov     edi, dstPtr
00545             theLoop:
00546                 // **** compute lighting
00547                 mov     esi,normalPtr           // uop: 0/1
00548                 // dot3
00549                 fld     dword ptr [esi]         // uop: 0/1
00550                 fmul    lightObjectSpace.x      // uop: 1/1 (5)
00551                 fld     dword ptr [esi+4]       // uop: 0/1
00552                 fmul    lightObjectSpace.y      // uop: 1/1 (5)
00553                 faddp   st(1),st                // uop: 1/0 (3)
00554                 fld     dword ptr [esi+8]       // uop: 0/1
00555                 fmul    lightObjectSpace.z      // uop: 1/1 (5)
00556                 faddp   st(1),st                // uop: 1/0 (3)
00557                 fistp   f8                      // uop: 2/0/1/1 (5)
00558                 // next
00559                 add     esi, 12                 // uop: 1/0
00560                 mov     normalPtr, esi          // uop: 0/0/1/1
00561 
00562 
00563                 // **** transform vertex, and store
00564                 mov     esi, vertPtr            // uop: 0/1
00565                 fild    word ptr[esi]           // uop: 3/1 (5)
00566                 fild    word ptr[esi+2]         // uop: 3/1 (5)
00567                 fild    word ptr[esi+4]         // uop: 3/1 (5)
00568                 // x
00569                 fld     a00                     // uop: 0/1
00570                 fmul    st, st(3)               // uop: 1/0 (5)
00571                 fld     a01                     // uop: 0/1
00572                 fmul    st, st(3)               // uop: 1/0 (5)
00573                 faddp   st(1), st               // uop: 1/0 (3)
00574                 fld     a02                     // uop: 0/1
00575                 fmul    st, st(2)               // uop: 1/0 (5)
00576                 faddp   st(1), st               // uop: 1/0 (3)
00577                 fld     matPos.x                // uop: 0/1
00578                 faddp   st(1), st               // uop: 1/0 (3)
00579                 fstp    dword ptr[edi]          // uop: 0/0/1/1
00580                 // y
00581                 fld     a10
00582                 fmul    st, st(3)
00583                 fld     a11
00584                 fmul    st, st(3)
00585                 faddp   st(1), st
00586                 fld     a12
00587                 fmul    st, st(2)
00588                 faddp   st(1), st
00589                 fld     matPos.y
00590                 faddp   st(1), st
00591                 fstp    dword ptr[edi+4]
00592                 // z
00593                 fld     a20
00594                 fmul    st, st(3)
00595                 fld     a21
00596                 fmul    st, st(3)
00597                 faddp   st(1), st
00598                 fld     a22
00599                 fmul    st, st(2)
00600                 faddp   st(1), st
00601                 fld     matPos.z
00602                 faddp   st(1), st
00603                 fstp    dword ptr[edi+8]
00604                 // flush stack
00605                 fstp    st                      // uop: 1/0
00606                 fstp    st                      // uop: 1/0
00607                 fstp    st                      // uop: 1/0
00608                 // next
00609                 add     esi, 6                  // uop: 1/0
00610                 mov     vertPtr, esi            // uop: 0/0/1/1
00611 
00612 
00613                 // **** copy uv
00614                 mov     esi, uvPtr                          // uop: 0/1
00615                 mov     eax, [esi]                          // uop: 0/1
00616                 mov     [edi+NL3D_CLOD_UV_OFF], eax         // uop: 0/0/1/1
00617                 mov     ebx, [esi+4]                        // uop: 0/1
00618                 mov     [edi+NL3D_CLOD_UV_OFF+4], ebx       // uop: 0/0/1/1
00619                 // next
00620                 add     esi, 8                  // uop: 1/0
00621                 mov     uvPtr, esi              // uop: 0/0/1/1
00622 
00623 
00624                 // **** Clamp lighting
00625                 // clamp to 0 only. will be clamped to 255 by MMX
00626                 mov     eax, f8                 // uop: 0/1
00627                 cmp     eax, 0x80000000         // if>=0 => CF=1
00628                 sbb     ebx, ebx                // if>=0 => CF==1 => ebx=0xFFFFFFFF
00629                 and     eax, ebx                // if>=0 => eax unchanged, else eax=0 (clamped)
00630 
00631 
00632                 // **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
00633                 movd            mm0, eax        // 0000000L     uop: 1/0
00634                 packuswb        mm0, mm0        // 000L000L     uop: 1/0 (p1)
00635                 packuswb        mm0, mm0        // 0L0L0L0L     uop: 1/0 (p1)
00636                 movd            mm1, diffuse    //              uop: 0/1
00637                 punpcklbw       mm1, blank      //              uop: 1/1 (p1)
00638                 pmullw          mm0, mm1        // diffuse*L    uop: 1/0 (3)
00639                 psrlw           mm0, 8          // 0A0B0G0R     uop: 1/0 (p1)
00640                 packuswb        mm0, blank      // 0000ABGR     uop: 1/1 (p1)
00641                 movd            mm2, ambient    //              uop: 0/1
00642                 paddusb         mm0, mm2        //              uop: 1/0
00643                 movd            ebx, mm0        // ebx= AABBGGRR    uop: 1/0
00644                 // NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
00645                 emms                            // uop: 11/0 (6).  (?????)
00646 
00647 
00648                 // **** append alpha, and store
00649                 mov     esi, alphaPtr                       // uop: 0/1
00650                 movzx   eax, byte ptr[esi]                  // uop: 0/1
00651                 shl     eax, 24                             // uop: 1/0
00652                 add     ebx, eax                            // uop: 1/0
00653                 // now, ebx=  AABBGGRR
00654                 mov     [edi+NL3D_CLOD_COLOR_OFF], ebx      // uop: 0/0/1/1
00655                 // next
00656                 add     esi, 1                  // uop: 1/0
00657                 mov     alphaPtr, esi           // uop: 0/0/1/1
00658 
00659 
00660                 // **** next
00661                 add     edi, NL3D_CLOD_VERTEX_SIZE      // uop: 1/0
00662 
00663                 mov     eax, numVertices        // uop: 0/1
00664                 dec     eax                     // uop: 1/0
00665                 mov     numVertices, eax        // uop: 0/0/1/1
00666 
00667                 jnz     theLoop                 // uop: 1/1 (p1)
00668 
00669                 // To have same behavior than c code
00670                 mov     dstPtr, edi
00671             }
00672         }
00673     }
00674     else
00675 #endif
00676     {
00677         H_AUTO( NL3D_CharacterLod_vertexFill );
00678 
00679         CVector     fVect;
00680 
00681         for(;numVertices>0;)
00682         {
00683             // NB: order is important for AGP filling optimisation
00684             // transform vertex, and store.
00685             CVector     *dstVector= (CVector*)dstPtr;
00686             fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
00687             ++vertPtr;
00688             dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
00689             dstVector->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
00690             dstVector->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
00691             // Copy UV
00692             *(CUV*)(dstPtr + NL3D_CLOD_UV_OFF)= *uvPtr;
00693             ++uvPtr;
00694 
00695             // Compute Lighting.
00696             CRGBA   lightRes;
00697             computeLodLighting(lightRes, lightObjectSpace, *normalPtr, ambient, diffuse);
00698             ++normalPtr;
00699             lightRes.A= *alphaPtr;
00700             ++alphaPtr;
00701             // store.
00702             *((CRGBA*)(dstPtr + NL3D_CLOD_COLOR_OFF))= lightRes;
00703 
00704             // next
00705             dstPtr+= NL3D_CLOD_VERTEX_SIZE;
00706             numVertices--;
00707         }
00708     }
00709 
00710     // Add Primitives.
00711     //=============
00712 
00713     {
00714         H_AUTO( NL3D_CharacterLod_primitiveFill )
00715 
00716         // get number of tri indexes
00717         uint    numTriIdxs= clod->getNumTriangles() * 3;
00718 
00719         // Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
00720         if(numTriIdxs)
00721         {
00722             // realloc tris if needed.
00723             if(_CurrentTriId+numTriIdxs > _Triangles.getNumIndexes())
00724             {
00725                 _Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
00726                 _Triangles.setNumIndexes(_CurrentTriId+numTriIdxs);
00727             }
00728 
00729             // reindex and copy tris
00730             CIndexBufferReadWrite iba;
00731             _Triangles.lock(iba);
00732             const TLodCharacterIndexType    *srcIdx= clod->getTriangleArray();
00733             nlassert(sizeof(TLodCharacterIndexType) == _Triangles.getIndexNumBytes());
00734             TLodCharacterIndexType      *dstIdx= (TLodCharacterIndexType *) iba.getPtr()+_CurrentTriId;
00735             for(;numTriIdxs>0;numTriIdxs--, srcIdx++, dstIdx++)
00736             {
00737                 *dstIdx= *srcIdx + _CurrentVertexId;
00738             }
00739         }
00740     }
00741 
00742     // Next
00743     //=============
00744 
00745     // Inc Vertex count.
00746     _CurrentVertexId+= clod->getNumVertices();
00747     // Inc Prim count.
00748     _CurrentTriId+= clod->getNumTriangles() * 3;
00749 
00750 
00751     // key added
00752     return true;
00753 }
00754 
00755 // ***************************************************************************
00756 void            CLodCharacterManager::endRender()
00757 {
00758     H_AUTO ( NL3D_CharacterLod_endRender );
00759 
00760     IDriver     *driver= _VertexStream.getDriver();
00761     nlassert(driver);
00762     // we must be between beginRender() and endRender()
00763     nlassert(isRendering());
00764 
00765     // if something rendered
00766     if(_LockDone)
00767     {
00768         // UnLock Buffer.
00769         _VertexStream.unlock(_CurrentVertexId);
00770         _LockDone= false;
00771 
00772         // Render the VBuffer and the primitives.
00773         if(_CurrentTriId>0)
00774         {
00775             // setup matrix.
00776             CMatrix     managerMatrix;
00777             managerMatrix.setPos(_ManagerMatrixPos);
00778             driver->setupModelMatrix(managerMatrix);
00779 
00780             // active VB
00781             _VertexStream.activate();
00782 
00783             // render triangles
00784             driver->activeIndexBuffer(_Triangles);
00785             driver->renderTriangles(_Material, 0, _CurrentTriId/3);
00786         }
00787 
00788         // swap Stream VBHard
00789         _VertexStream.swapVBHard();
00790     }
00791 
00792     // Ok, end rendering
00793     _Rendering= false;
00794 }
00795 
00796 // ***************************************************************************
00797 void            CLodCharacterManager::setupNormalCorrectionMatrix(const CMatrix &normalMatrix)
00798 {
00799     _LightCorrectionMatrix= normalMatrix;
00800     _LightCorrectionMatrix.setPos(CVector::Null);
00801     _LightCorrectionMatrix.invert();
00802 }
00803 
00804 
00805 // ***************************************************************************
00806 // ***************************************************************************
00807 // Texturing.
00808 // ***************************************************************************
00809 // ***************************************************************************
00810 
00811 
00812 // ***************************************************************************
00813 CLodCharacterTmpBitmap::CLodCharacterTmpBitmap()
00814 {
00815     reset();
00816 }
00817 
00818 // ***************************************************************************
00819 void            CLodCharacterTmpBitmap::reset()
00820 {
00821     // setup a 1*1 bitmap
00822     _Bitmap.resize(1);
00823     _Bitmap[0]= CRGBA::Black;
00824     _WidthPower=0;
00825     _UShift= 8;
00826     _VShift= 8;
00827 }
00828 
00829 // ***************************************************************************
00830 void            CLodCharacterTmpBitmap::build(const NLMISC::CBitmap &bmpIn)
00831 {
00832     uint    width= bmpIn.getWidth();
00833     uint    height= bmpIn.getHeight();
00834     nlassert(width>0 && width<=256);
00835     nlassert(height>0 && height<=256);
00836 
00837     // resize bitmap.
00838     _Bitmap.resize(width*height);
00839     _WidthPower= getPowerOf2(width);
00840     // compute shift
00841     _UShift= 8-getPowerOf2(width);
00842     _VShift= 8-getPowerOf2(height);
00843 
00844     // convert the bitmap.
00845     CBitmap     bmp= bmpIn;
00846     bmp.convertToType(CBitmap::RGBA);
00847     CRGBA   *src= (CRGBA*)&bmp.getPixels()[0];
00848     CRGBA   *dst= _Bitmap.getPtr();
00849     for(sint nPix= width*height;nPix>0;nPix--, src++, dst++)
00850     {
00851         *dst= *src;
00852     }
00853 }
00854 
00855 // ***************************************************************************
00856 void            CLodCharacterTmpBitmap::build(CRGBA col)
00857 {
00858     // setup a 1*1 bitmap and set it with col
00859     reset();
00860     _Bitmap[0]= col;
00861 }
00862 
00863 
00864 // ***************************************************************************
00865 void            CLodCharacterManager::initInstance(CLodCharacterInstance &instance)
00866 {
00867     // first release in (maybe) other manager.
00868     if(instance._Owner)
00869         instance._Owner->releaseInstance(instance);
00870 
00871     // get the shape
00872     const CLodCharacterShape    *clod= getShape(instance.ShapeId);
00873     // if not found quit
00874     if(!clod)
00875         return;
00876     // get Uvs.
00877     const CUV   *uvSrc= clod->getUVs();
00878     nlassert(uvSrc);
00879 
00880 
00881     // Ok, init header
00882     instance._Owner= this;
00883     instance._UVs.resize(clod->getNumVertices());
00884 
00885     // allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
00886     if(_FreeIds.empty())
00887     {
00888         // set a "Not enough memory" id
00889         instance._TextureId= NL3D_CLOD_TEXT_NUM_IDS;
00890         CUV     uv(0,0);
00891         fill(instance._UVs.begin(), instance._UVs.end(), uv);
00892     }
00893     // else OK, can instanciate the Uvs.
00894     else
00895     {
00896         // get the id.
00897         instance._TextureId= _FreeIds.back();
00898         _FreeIds.pop_back();
00899         // get the x/y.
00900         uint    xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
00901         uint    yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
00902         // compute the scale/bias to apply to Uvs.
00903         float   scaleU= 1.0f / NL3D_CLOD_TEXT_NLOD_WIDTH;
00904         float   scaleV= 1.0f / NL3D_CLOD_TEXT_NLOD_HEIGHT;
00905         float   biasU= (float)xId / NL3D_CLOD_TEXT_NLOD_WIDTH;
00906         float   biasV= (float)yId / NL3D_CLOD_TEXT_NLOD_HEIGHT;
00907         // apply it to each UVs.
00908         CUV     *uvDst= &instance._UVs[0];
00909         for(uint i=0; i<instance._UVs.size();i++)
00910         {
00911             uvDst[i].U= biasU + uvSrc[i].U*scaleU;
00912             uvDst[i].V= biasV + uvSrc[i].V*scaleV;
00913         }
00914     }
00915 }
00916 
00917 // ***************************************************************************
00918 void            CLodCharacterManager::releaseInstance(CLodCharacterInstance &instance)
00919 {
00920     if(instance._Owner==NULL)
00921         return;
00922     nlassert(this==instance._Owner);
00923 
00924     // if the id is not a "Not enough memory" id, release it.
00925     if(instance._TextureId>=0 && instance._TextureId<NL3D_CLOD_TEXT_NUM_IDS)
00926         _FreeIds.push_back(instance._TextureId);
00927 
00928     // reset the instance
00929     instance._Owner= NULL;
00930     instance._TextureId= -1;
00931     contReset(instance._UVs);
00932 }
00933 
00934 
00935 // ***************************************************************************
00936 CRGBA           *CLodCharacterManager::getTextureInstance(CLodCharacterInstance &instance)
00937 {
00938     nlassert(instance._Owner==this);
00939     nlassert(instance._TextureId!=-1);
00940     // if the texture id is a "not enough memory", quit.
00941     if(instance._TextureId==NL3D_CLOD_TEXT_NUM_IDS)
00942         return NULL;
00943 
00944     // get the x/y.
00945     uint    xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
00946     uint    yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
00947 
00948     // get the ptr on the correct pixel.
00949     CRGBA   *pix= (CRGBA*)&_BigTexture->getPixels(0)[0];
00950     return pix + yId*NL3D_CLOD_TEXT_HEIGHT*NL3D_CLOD_BIGTEXT_WIDTH + xId*NL3D_CLOD_TEXT_WIDTH;
00951 }
00952 
00953 
00954 // ***************************************************************************
00955 bool            CLodCharacterManager::startTextureCompute(CLodCharacterInstance &instance)
00956 {
00957     CRGBA   *dst= getTextureInstance(instance);
00958     if(!dst)
00959         return false;
00960 
00961     // erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
00962     CRGBA   col= NL3D_CLOD_DEFAULT_TEXCOLOR;
00963     for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
00964     {
00965         // erase the line
00966         for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
00967             dst[x]= col;
00968         // Next line
00969         dst+= NL3D_CLOD_BIGTEXT_WIDTH;
00970     }
00971 
00972     return true;
00973 }
00974 
00975 // ***************************************************************************
00976 void            CLodCharacterManager::addTextureCompute(CLodCharacterInstance &instance, const CLodCharacterTexture &lodTexture)
00977 {
00978     CRGBA   *dst= getTextureInstance(instance);
00979     if(!dst)
00980         return;
00981 
00982     // get lookup ptr.
00983     nlassert(lodTexture.Texture.size()==NL3D_CLOD_TEXT_SIZE);
00984     const CLodCharacterTexture::CTUVQ       *lookUpPtr= &lodTexture.Texture[0];
00985 
00986     // apply the lodTexture, taking only better quality (ie nearer 0)
00987     for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
00988     {
00989         // erase the line
00990         for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
00991         {
00992             CLodCharacterTexture::CTUVQ     lut= *lookUpPtr;
00993             // if this quality is better than the one stored
00994             if(lut.Q<dst[x].A)
00995             {
00996                 // get what texture to read, and read the pixel.
00997                 CRGBA   col= _TmpBitmaps[lut.T].getPixel(lut.U, lut.V);
00998                 // set quality.
00999                 col.A= lut.Q;
01000                 // set in dest
01001                 dst[x]= col;
01002             }
01003 
01004             // next lookup
01005             lookUpPtr++;
01006         }
01007         // Next line
01008         dst+= NL3D_CLOD_BIGTEXT_WIDTH;
01009     }
01010 }
01011 
01012 // ***************************************************************************
01013 void            CLodCharacterManager::endTextureCompute(CLodCharacterInstance &instance, uint numBmpToReset)
01014 {
01015     CRGBA   *dst= getTextureInstance(instance);
01016     if(!dst)
01017         return;
01018 
01019     // reset All Alpha values to 255 => no AlphaTest problems
01020     for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
01021     {
01022         // erase the line
01023         for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
01024         {
01025             dst[x].A= 255;
01026         }
01027         // Next line
01028         dst+= NL3D_CLOD_BIGTEXT_WIDTH;
01029     }
01030 
01031     // If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
01032     if(instance._TextureId==0)
01033         *(CRGBA*)&_BigTexture->getPixels(0)[0]= NL3D_CLOD_DEFAULT_TEXCOLOR;
01034 
01035     // get the x/y.
01036     uint    xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
01037     uint    yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
01038     // touch the texture for Driver update.
01039     _BigTexture->touchRect(
01040         CRect(xId*NL3D_CLOD_TEXT_WIDTH, yId*NL3D_CLOD_TEXT_HEIGHT, NL3D_CLOD_TEXT_WIDTH, NL3D_CLOD_TEXT_HEIGHT) );
01041 
01042     // reset tmpBitmaps / free memory.
01043     for(uint i=0; i<numBmpToReset; i++)
01044     {
01045         _TmpBitmaps[i].reset();
01046     }
01047 
01048     // TestYoyo
01049     /*NLMISC::COFile    f("tam.tga");
01050     _BigTexture->writeTGA(f,32);*/
01051 }
01052 
01053 
01054 // ***************************************************************************
01055 bool    CLodCharacterManager::fastIntersect(const CLodCharacterInstance &instance, const NLMISC::CMatrix &toRaySpace, float &dist2D, float &distZ, bool computeDist2D)
01056 {
01057     H_AUTO ( NL3D_CharacterLod_fastIntersect )
01058 
01059     uint            numVertices;
01060     const CLodCharacterShape::CVector3s     *vertPtr;
01061     CVector         matPos;
01062     float           a00, a01, a02;
01063     float           a10, a11, a12;
01064     float           a20, a21, a22;
01065 
01066 
01067     // Get the Shape and current key.
01068     //=============
01069 
01070     // get the shape
01071     const CLodCharacterShape    *clod= getShape(instance.ShapeId);
01072     // if not found quit
01073     if(!clod)
01074         return false;
01075 
01076     // get the anim key
01077     CVector     unPackScaleFactor;
01078     vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
01079     // if not found quit
01080     if(!vertPtr)
01081         return false;
01082     // get num verts
01083     numVertices= clod->getNumVertices();
01084 
01085     // empty shape??
01086     if(numVertices==0)
01087         return false;
01088 
01089     // Prepare Transform
01090     //=============
01091 
01092     // Get matrix pos.
01093     matPos= toRaySpace.getPos();
01094     // Get rotation line vectors
01095     const float *rayM= toRaySpace.get();
01096     a00= rayM[0]; a01= rayM[4]; a02= rayM[8];
01097     a10= rayM[1]; a11= rayM[5]; a12= rayM[9];
01098     a20= rayM[2]; a21= rayM[6]; a22= rayM[10];
01099 
01100     // multiply matrix with scale factor for Pos.
01101     a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
01102     a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
01103     a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
01104 
01105     // get dst Array.
01106     // enlarge temp buffer
01107     static std::vector<CVector> lodInRaySpace;
01108     if(numVertices>lodInRaySpace.size())
01109         lodInRaySpace.resize(numVertices);
01110     CVector *dstPtr= &lodInRaySpace[0];
01111 
01112 
01113     // Fill the temp skin
01114     //=============
01115     {
01116         CVector     fVect;
01117 
01118         for(;numVertices>0;)
01119         {
01120             // transform vertex, and store.
01121             fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
01122             ++vertPtr;
01123             dstPtr->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
01124             dstPtr->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
01125             dstPtr->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
01126 
01127             // next
01128             dstPtr++;
01129             numVertices--;
01130         }
01131     }
01132 
01133     // Test intersection
01134     //=============
01135 
01136     return CRayMesh::getRayIntersection(lodInRaySpace, clod->getTriangleIndices(), dist2D, distZ, computeDist2D);
01137 }
01138 
01139 
01140 } // NL3D

Generated on Thu Jan 7 08:26:27 2010 for NeL by  doxygen 1.6.1