diff_tool.h

Go to the documentation of this file.
00001 
00004 /* Copyright, 2000, 2001, 2002 Nevrax Ltd.
00005  *
00006  * This file is part of NEVRAX NEL.
00007  * NEVRAX NEL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2, or (at your option)
00010  * any later version.
00011 
00012  * NEVRAX NEL is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00015  * General Public License for more details.
00016 
00017  * You should have received a copy of the GNU General Public License
00018  * along with NEVRAX NEL; see the file COPYING. If not, write to the
00019  * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00020  * MA 02111-1307, USA.
00021  */
00022 
00023 #ifndef DIFF_TOOL_H
00024 #define DIFF_TOOL_H
00025 
00026 #include "i18n.h"
00027 
00028 namespace STRING_MANAGER
00029 {
00030     const ucstring      nl("\r\n");
00031 
00032 
00033     struct TStringInfo
00034     {
00035         std::string         Identifier;
00036         ucstring            Text;
00037         ucstring            Text2;
00038         mutable ucstring    Comments;
00039         uint64              HashValue;
00040     };
00041 
00042     struct TStringDiffContext
00043     {
00044         typedef std::vector<TStringInfo>::iterator  iterator;
00045         const std::vector<TStringInfo>  &Addition;
00046         std::vector<TStringInfo>        &Reference;
00047         std::vector<TStringInfo>        &Diff;
00048 
00049         TStringDiffContext(const std::vector<TStringInfo> &addition, std::vector<TStringInfo> &reference, std::vector<TStringInfo> &diff)
00050             : Addition(addition),
00051             Reference(reference),
00052             Diff(diff)
00053         {
00054         }
00055     };
00056 
00057     struct TClause
00058     {
00059         std::string Identifier;
00060         ucstring    Conditions;
00061         ucstring    Text;
00062         ucstring    Comments;
00063         uint64      HashValue;
00064     };
00065 
00066     struct TPhrase
00067     {
00068         std::string             Identifier;
00069         ucstring                Parameters;
00070         mutable ucstring        Comments;
00071         std::vector<TClause>    Clauses;
00072         uint64                  HashValue;
00073     };
00074 
00075     struct TPhraseDiffContext
00076     {
00077         typedef std::vector<TPhrase>::iterator iterator;
00078         const std::vector<TPhrase>  &Addition;
00079         std::vector<TPhrase>            &Reference;
00080         std::vector<TPhrase>            &Diff;
00081 
00082         TPhraseDiffContext(const std::vector<TPhrase> &addition, std::vector<TPhrase> &reference, std::vector<TPhrase> &diff)
00083             : Addition(addition),
00084             Reference(reference),
00085             Diff(diff)
00086         {
00087         }
00088     };
00089 
00090     struct TWorksheet
00091     {
00092         typedef std::vector<ucstring>   TRow;
00093         typedef std::vector<TRow>       TData;
00094         TData   Data;
00095         uint    ColCount;
00096 
00097         TWorksheet()
00098             : ColCount(0)
00099         {
00100         }
00101 
00102         std::vector<TRow>::iterator     begin()
00103         {
00104             return Data.begin();
00105         }
00106 
00107         std::vector<TRow>::iterator     end()
00108         {
00109             return Data.end();
00110         }
00111 
00112         std::vector<TRow>::const_iterator       begin() const
00113         {
00114             return Data.begin();
00115         }
00116 
00117         std::vector<TRow>::const_iterator       end() const
00118         {
00119             return Data.end();
00120         }
00121 
00122         void push_back(const TRow &row)
00123         {
00124             Data.push_back(row);
00125         }
00126 
00127         std::vector<TRow>::iterator insert(std::vector<TRow>::iterator pos, const TRow &value)
00128         {
00129             return Data.insert(pos, value);
00130         }
00131 
00132         std::vector<TRow>::iterator erase(std::vector<TRow>::iterator it)
00133         {
00134             return Data.erase(it);
00135         }
00136 
00137         TRow &back()
00138         {
00139             return Data.back();
00140         }
00141 
00142         TRow &operator [] (uint index)
00143         {
00144             return Data[index];
00145         }
00146 
00147         const TRow &operator [] (uint index) const
00148         {
00149             return Data[index];
00150         }
00151 
00152         uint size() const
00153         {
00154             return Data.size();
00155         }
00156 
00157         void insertColumn(uint colIndex)
00158         {
00159             nlassert(colIndex <= ColCount);
00160 
00161             for (uint i=0; i<Data.size(); ++i)
00162             {
00163                 // insert a default value.
00164                 Data[i].insert(Data[i].begin()+colIndex, ucstring());
00165             }
00166             ColCount++;
00167         }
00168 
00169         void copyColumn(uint srcColIndex, uint dstColIndex)
00170         {
00171             nlassert(srcColIndex < ColCount);
00172             nlassert(dstColIndex < ColCount);
00173 
00174             for (uint i=0; i<Data.size(); ++i)
00175             {
00176                 Data[i][dstColIndex] = Data[i][srcColIndex];
00177             }
00178         }
00179 
00180         void eraseColumn(uint colIndex)
00181         {
00182             nlassertex(colIndex < ColCount, ("TWorksheet::eraseColumn : bad column index: colIndex(%u) is not less than ColCount(%u)", colIndex, ColCount));
00183 
00184             for (uint i=0; i<Data.size(); ++i)
00185             {
00186                 // insert a default value.
00187                 Data[i].erase(Data[i].begin()+colIndex);
00188             }
00189             ColCount--;
00190         }
00191 
00192         void moveColumn(uint oldColIndex, uint newColIndex)
00193         {
00194             nlassert(oldColIndex < ColCount);
00195             nlassert(newColIndex < ColCount);
00196 
00197             if (oldColIndex == newColIndex)
00198                 return;
00199 
00200             if (newColIndex > oldColIndex)
00201             {
00202                 // the dst is after the src, no problem with index
00203                 insertColumn(newColIndex);
00204                 copyColumn(oldColIndex, newColIndex);
00205                 eraseColumn(oldColIndex);
00206             }
00207             else
00208             {
00209                 // the dst is before the src, need to take the column insertion into account
00210                 insertColumn(newColIndex);
00211                 copyColumn(oldColIndex+1, newColIndex);
00212                 eraseColumn(oldColIndex+1);
00213             }
00214         }
00215 
00216         void setColCount(uint count)
00217         {
00218             if (count != ColCount)
00219             {
00220                 for (uint i=0; i<Data.size(); ++i)
00221                     Data[i].resize(count);
00222             }
00223             ColCount = count;
00224         }
00225 
00226         bool findId(uint& colIndex)
00227         {
00228             if (Data.empty())
00229                 return false;
00230 
00231             for (TWorksheet::TRow::iterator it=Data[0].begin(); it!=Data[0].end(); ++it)
00232             {
00233                 std::string columnTitle = (*it).toString();
00234                 if ( ! columnTitle.empty() )
00235                 {
00236                     // Return the first column for which the title does not begin with '*'
00237                     if ( columnTitle[0] != '*' )
00238                     {
00239                         colIndex = (it - Data[0].begin());
00240                         return true;
00241                     }
00242                 }
00243             }
00244             return false;
00245         }
00246 
00247         bool findCol(ucstring colName, uint &colIndex)
00248         {
00249             if (Data.empty())
00250                 return false;
00251             TWorksheet::TRow::iterator it = std::find(Data[0].begin(), Data[0].end(), ucstring(colName));
00252             if (it == Data[0].end())
00253                 return false;
00254 
00255             colIndex = it - Data[0].begin();
00256             return true;
00257         }
00258 
00259         void insertRow(uint rowIndex, const TRow &row)
00260         {
00261             nlassertex(rowIndex <= Data.size(), ("TWorksheet::insertRow: bad row index: rowIndex(%u) is out of range (max=%u)", rowIndex, Data.size()-1));
00262             nlassertex(row.size() == ColCount, ("TWorksheet::insertRow: bad column count : inserted row size(%u) is invalid (must be %u) at rowIndex(%u)", row.size(), ColCount, rowIndex));
00263 
00264             Data.insert(Data.begin()+rowIndex, row);
00265         }
00266 
00267         // resize the rows
00268         void resize(uint numRows)
00269         {
00270             uint    oldSize= Data.size();
00271             Data.resize(numRows);
00272             // alloc good Column count for new lines
00273             for(uint i= oldSize;i<Data.size();i++)
00274                 Data[i].resize(ColCount);
00275         }
00276 
00277         bool findRow(uint colIndex, const ucstring &colValue, uint &rowIndex)
00278         {
00279             nlassertex(colIndex < ColCount, ("TWorksheet::findRow: bad column index: colIndex(%u) is not less than ColCount(%u)", colIndex, ColCount));
00280 
00281             TData::iterator first(Data.begin()), last(Data.end());
00282 
00283             for (; first != last; ++first)
00284             {
00285                 if (first->operator[](colIndex) == colValue)
00286                 {
00287                     rowIndex = first - Data.begin();
00288                     return true;
00289                 }
00290 
00291             }
00292             return false;
00293         }
00294 
00295         void setData(uint rowIndex, uint colIndex, const ucstring &value)
00296         {
00297             nlassertex(rowIndex < Data.size(), ("TWorksheet::setData: bad row index: rowIndex(%u) is out of range (max=%u)", rowIndex, Data.size()));
00298             nlassertex(colIndex < ColCount, ("TWorksheet::setData: bad column index: colIndex(%u) is not less than ColCount(%u) ar rowIndex(%u)", colIndex, ColCount, rowIndex));
00299 
00300             Data[rowIndex][colIndex] = value;
00301         }
00302 
00303         const ucstring &getData(uint rowIndex, uint colIndex) const
00304         {
00305             nlassertex(rowIndex < Data.size(), ("TWorksheet::getData: bad row index: rowIndex(%u) is out of range (max=%u)", rowIndex, Data.size()));
00306             nlassertex(colIndex < ColCount, ("TWorksheet::getData: bad column index: colIndex(%u) is not less than ColCount(%u) at rowIndex(%u)", colIndex, ColCount, rowIndex));
00307 
00308             return Data[rowIndex][colIndex];
00309         }
00310 
00311         void setData(uint rowIndex, const ucstring &colName, const ucstring &value)
00312         {
00313             nlassertex(rowIndex > 0, ("TWorksheet::setData: rowIndex(%u) must be greater then 0 !", rowIndex));
00314             nlassertex(rowIndex < Data.size(), ("TWorksheet::setData: rowIndex(%u) is out of range (max=%u)", rowIndex, Data.size()));
00315             TWorksheet::TRow::iterator it = std::find(Data[0].begin(), Data[0].end(), ucstring(colName));
00316             nlassertex(it != Data[0].end(), ("TWorksheet::setData: invalid colName: can't find the column named '%s' at row %u", colName.toString().c_str(), rowIndex));
00317 
00318             Data[rowIndex][it - Data[0].begin()] = value;
00319         }
00320         const ucstring &getData(uint rowIndex, const ucstring &colName) const
00321         {
00322             nlassertex(rowIndex > 0, ("TWorksheet::getData: bad row index: rowIndex(%u) must be greater then 0 !", rowIndex));
00323             nlassertex(rowIndex < Data.size(), ("TWorksheet::getData: bad row index: rowIndex(%u) is out of range (max=%u)", rowIndex, Data.size()));
00324             TWorksheet::TRow::const_iterator it = std::find(Data[0].begin(), Data[0].end(), ucstring(colName));
00325             nlassertex(it != Data[0].end(), ("TWorksheet::getData: invalid colName: can't find the column named '%s' at row %u", colName.toString().c_str(), rowIndex));
00326 
00327             return Data[rowIndex][it - Data[0].begin()];
00328         }
00329     };
00330 
00331 
00332     struct TGetWorksheetIdentifier
00333     {
00334         std::string operator()(const TWorksheet &container, uint index) const
00335         {
00336             return container.getData(index, 1).toString();
00337         }
00338     };
00339 
00340     struct TGetWorksheetHashValue
00341     {
00342         uint64 operator()(const TWorksheet &container, uint index) const
00343         {
00344             return NLMISC::CI18N::stringToHash(container.getData(index, ucstring("*HASH_VALUE")).toString());
00345         }
00346     };
00347 
00348     struct TTestWorksheetItem : public std::unary_function<TWorksheet::TRow, bool>
00349     {
00350         ucstring    Identifier;
00351         TTestWorksheetItem(const std::string &identifier)
00352             : Identifier(identifier)
00353         {}
00354         bool operator () (const TWorksheet::TRow &row) const
00355         {
00356             return row[1] == Identifier;
00357         }
00358     };
00359 
00360 
00361     struct TWordsDiffContext
00362     {
00363         typedef TWorksheet::TData::iterator iterator;
00364         const TWorksheet    &Addition;
00365         TWorksheet          &Reference;
00366         TWorksheet          &Diff;
00367 
00368         TWordsDiffContext(const TWorksheet &addition, TWorksheet &reference, TWorksheet &diff)
00369             : Addition(addition),
00370             Reference(reference),
00371             Diff(diff)
00372         {
00373         }
00374     };
00375 
00376     template<class ItemType>
00377     struct TGetIdentifier
00378     {
00379         std::string operator()(const std::vector<ItemType> &container, uint index) const
00380         {
00381             return container[index].Identifier;
00382         }
00383     };
00384 
00385     template<class ItemType>
00386     struct TGetHashValue
00387     {
00388         uint64 operator()(const std::vector<ItemType> &container, uint index) const
00389         {
00390             return container[index].HashValue;
00391         }
00392     };
00393 
00394     template<class ItemType>
00395     struct TTestItem : public std::unary_function<ItemType, bool>
00396     {
00397         std::string Identifier;
00398         TTestItem(const std::string &identifier)
00399             : Identifier(identifier)
00400         {}
00401         bool operator () (const ItemType &item) const
00402         {
00403             return item.Identifier == Identifier;
00404         }
00405     };
00406 
00413     template <class ItemType, class Context, class GetIdentifier = TGetIdentifier<ItemType>, class GetHashValue = TGetHashValue<ItemType>, class TestItem = TTestItem<ItemType> >
00414     class CMakeDiff
00415     {
00416     public:
00417         struct IDiffCallback
00418         {
00419             virtual void onEquivalent(uint addIndex, uint refIndex, Context &context) = 0;
00420             virtual void onAdd(uint addIndex, uint refIndex, Context &context) = 0;
00421             virtual void onRemove(uint addIndex, uint refIndex, Context &context) = 0;
00422             virtual void onChanged(uint addIndex, uint refIndex, Context &context) = 0;
00423             virtual void onSwap(uint newIndex, uint refIndex, Context &context) = 0;
00424 
00425         };
00426 
00427         void makeDiff(IDiffCallback *callback, Context &context, bool skipFirstRecord = false)
00428         {
00429 #ifdef NL_DEBUG
00430             // compile time checking
00431 //          Context::iterator testIt;
00432 #endif
00433             GetIdentifier   getIdentifier;
00434             GetHashValue    getHashValue;
00435             // compare the context.Reference an context.Addition file, remove any equivalent strings.
00436             uint addCount, refCount;
00437             if (skipFirstRecord)
00438             {
00439                 addCount = 1;
00440                 refCount = 1;
00441             }
00442             else
00443             {
00444                 addCount = 0;
00445                 refCount=0;
00446             }
00447 
00448             while (addCount < context.Addition.size() || refCount < context.Reference.size())
00449             {
00450                 bool equal = true;
00451                 if (addCount != context.Addition.size() && refCount != context.Reference.size())
00452                 {
00453                     equal = getHashValue(context.Addition, addCount) == getHashValue(context.Reference, refCount);
00454                 }
00455 
00456 //              vector<ItemType>::iterator it;
00457 
00458                 if (addCount == context.Addition.size()
00459                     ||
00460                         (
00461                             !equal
00462                         &&  find_if(context.Addition.begin(), context.Addition.end(), TestItem(getIdentifier(context.Reference, refCount))) == context.Addition.end()
00463                         )
00464                     )
00465                 {
00466                     // this can only be removal
00467                     callback->onRemove(addCount, refCount, context);
00468                     context.Reference.erase(context.Reference.begin()+refCount);
00469 //                  ++refCount;
00470                 }
00471                 else if (refCount == context.Reference.size()
00472                     ||
00473                         (
00474                             !equal
00475                         &&  find_if(context.Reference.begin(), context.Reference.end(), TestItem(getIdentifier(context.Addition, addCount))) == context.Reference.end()
00476                         )
00477                     )
00478                 {
00479                     // this can only be context.Addition
00480                     callback->onAdd(addCount, refCount, context);
00481                     context.Reference.insert(context.Reference.begin()+refCount, context.Addition[addCount]);
00482                     ++refCount;
00483                     ++addCount;
00484                 }
00485                 else if (getIdentifier(context.Addition, addCount) != getIdentifier(context.Reference, refCount))
00486                 {
00487                     // swap two element.
00488 //                  Context::iterator it = find_if(context.Reference.begin(), context.Reference.end(), TestItem(getIdentifier(context.Addition, addCount)));
00489 //                  if (it == context.Reference.end())
00490 
00491                     if (find_if(
00492                             context.Reference.begin(),
00493                             context.Reference.end(),
00494                             TestItem(getIdentifier(context.Addition, addCount)))
00495                             == context.Reference.end())
00496                     {
00497                         // context.Addition
00498                         callback->onAdd(addCount, refCount, context);
00499                         context.Reference.insert(context.Reference.begin()+refCount, context.Addition[addCount]);
00500                         ++refCount;
00501                         ++addCount;
00502                     }
00503                     else
00504                     {
00505 //                      nlassert(it != context.Reference.begin()+refCount);
00506                         uint index = find_if(context.Reference.begin(), context.Reference.end(), TestItem(getIdentifier(context.Addition, addCount))) - context.Reference.begin();
00507 
00508 //                      callback->onSwap(it - context.Reference.begin(), refCount, context);
00509                         callback->onSwap(index, refCount, context);
00510 //                      swap(*it, context.Reference[refCount]);
00511                         swap(context.Reference[index], context.Reference[refCount]);
00512                     }
00513                 }
00514                 else if (getHashValue(context.Addition, addCount) != getHashValue(context.Reference, refCount))
00515                 {
00516                     // changed element
00517                     callback->onChanged(addCount, refCount, context);
00518                     ++refCount;
00519                     ++addCount;
00520                 }
00521                 else
00522                 {
00523                     // same entry
00524                     callback->onEquivalent(addCount, refCount, context);
00525                     addCount++;
00526                     refCount++;
00527                 }
00528             }
00529         }
00530     };
00531 
00532     typedef CMakeDiff<TStringInfo, TStringDiffContext>      TStringDiff;
00533     typedef CMakeDiff<TPhrase, TPhraseDiffContext>          TPhraseDiff;
00534     typedef CMakeDiff<TWorksheet::TRow, TWordsDiffContext, TGetWorksheetIdentifier, TGetWorksheetHashValue, TTestWorksheetItem> TWorkSheetDiff;
00535 
00536 
00537     uint64      makePhraseHash(const TPhrase &phrase);
00538     bool        parseHashFromComment(const ucstring &comments, uint64 &hashValue);
00539 
00540     bool        loadStringFile(const std::string filename, std::vector<TStringInfo> &stringInfos, bool forceRehash, ucchar openMark = '[', ucchar closeMark = ']', bool specialCase = false);
00541     ucstring    prepareStringFile(const std::vector<TStringInfo> &strings, bool removeDiffComments, bool noDiffInfo = false);
00542 
00543     bool        readPhraseFile(const std::string &filename, std::vector<TPhrase> &phrases, bool forceRehash);
00544     bool        readPhraseFileFromString(ucstring const& doc, const std::string &filename, std::vector<TPhrase> &phrases, bool forceRehash);
00545     ucstring    tabLines(uint nbTab, const ucstring &str);
00546     ucstring    preparePhraseFile(const std::vector<TPhrase> &phrases, bool removeDiffComments);
00547 
00548     bool        loadExcelSheet(const std::string filename, TWorksheet &worksheet, bool checkUnique = true);
00549     bool        readExcelSheet(const ucstring &text, TWorksheet &worksheet, bool checkUnique = true);
00550     void        makeHashCode(TWorksheet &sheet, bool forceRehash);
00551     ucstring    prepareExcelSheet(const TWorksheet &worksheet);
00552 
00553 }   // namespace STRING_MANAGER
00554 
00555 #endif // DIFF_TOOL_H

Generated on Thu Jan 7 08:26:22 2010 for NeL by  doxygen 1.6.1