00001 /* -*- mode: c++ -*- 00002 */ 00003 /* 00004 00005 GIFT, a flexible content based image retrieval system. 00006 Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva 00007 00008 Copyright (C) 2003, 2004 Bayreuth University 00009 2005 Bamberg University 00010 This program is free software; you can redistribute it and/or modify 00011 it under the terms of the GNU General Public License as published by 00012 the Free Software Foundation; either version 2 of the License, or 00013 (at your option) any later version. 00014 00015 This program is distributed in the hope that it will be useful, 00016 but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 GNU General Public License for more details. 00019 00020 You should have received a copy of the GNU General Public License 00021 along with this program; if not, write to the Free Software 00022 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 00024 */ 00025 // -*- mode: c++ -*- 00026 00027 00028 class CXMLElement; 00029 00049 #ifndef _CINVERTEDFILEACCESSOR 00050 #define _CINVERTEDFILEACCESSOR 00051 #include "libGIFTAcInvertedFile/include/uses-declarations.h" 00052 #include <string> 00053 #include "libMRML/include/TID.h" 00054 #include "libMRML/include/CSelfDestroyPointer.h" 00055 #include "libMRML/include/CArraySelfDestroyPointer.h" 00056 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h" 00057 #include "CCollectionFrequencyList.h" 00058 #include "libGIFTAcInvertedFile/include/CADIHash.h" 00059 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h" 00060 #include <iostream> 00061 #include <fstream> 00062 #include <map> 00063 #include <vector> 00064 #ifdef HAS_HASH_MAP 00065 #include <hash_map> 00066 #else 00067 #define hash_map map 00068 #endif 00069 #include <functional> 00070 #include <algorithm> 00071 00072 #include "libMRML/include/CMagic.h" 00073 00074 00075 typedef TID TFeatureID ; 00076 00083 class CAcInvertedFile:public CAcURL2FTS{ 00084 00085 protected: 00087 TID mMaximumFeatureID; 00090 CArraySelfDestroyPointer<char> mInvertedFileBuffer; 00092 mutable CSelfDestroyPointer<istream> mInvertedFile; 00093 00095 mutable ifstream mOffsetFile; 00096 00098 ifstream mFeatureDescriptionFile; 00099 00101 string mInvertedFileName; 00102 00104 string mOffsetFileName; 00105 00107 string mFeatureDescriptionFileName; 00108 00110 typedef hash_map<TID,unsigned int> CIDToOffset;//new hash 00112 CIDToOffset mIDToOffset; 00113 00115 mutable hash_map<TID,double> mFeatureToCollectionFrequency;//new hash 00116 00120 hash_map<TID,unsigned int> mFeatureDescription;//new hash_ 00121 00125 CADIHash mDocumentInformation; 00127 00130 void writeOffsetFileElement(TID inFeatureID, 00131 int inPosition, 00132 ostream& inOpenOffsetFile); 00134 CDocumentFrequencyList* getFeatureFile(string inFileName)const; 00135 public: 00137 bool operator()()const; 00138 00153 CAcInvertedFile(const CXMLElement& inCollectionElement); 00155 bool init(bool); 00156 00158 ~CAcInvertedFile(); 00159 00161 string IDToURL(TID inID)const; 00162 00164 TID URLToID(const string& inURL)const; 00165 00169 CDocumentFrequencyList* FeatureToList(TFeatureID)const; 00170 00172 CDocumentFrequencyList* URLToFeatureList(string inURL)const; 00173 00175 CDocumentFrequencyList* DIDToFeatureList(TID inDID)const; 00176 00178 00179 00183 double FeatureToCollectionFrequency(TFeatureID)const; 00184 00186 unsigned int getFeatureDescription(TID inFeatureID)const; 00188 00192 double DIDToMaxDocumentFrequency(TID)const; 00193 00195 double DIDToDFSquareSum(TID)const; 00196 00198 double DIDToSquareDFLogICFSum(TID)const; 00200 00201 /*@name Inverted File Generation and Consistency Checking*/ 00203 00211 bool generateInvertedFile(); 00212 00220 bool newGenerateInvertedFile(); 00221 00224 bool checkConsistency(); 00225 00229 bool findWithinStream(TID inFeatureID, 00230 TID inDocumentID, 00231 double inDocumentFrequency)const; 00232 00234 00236 TID getMaximumFeatureID()const; 00244 list<TID>* getAllFeatureIDs()const; 00245 }; 00246 00247 #endif