7 #ifndef CHARTOKENIZER_H 8 #define CHARTOKENIZER_H 37 virtual bool incrementToken();
39 virtual void reset(
const ReaderPtr& input);
45 virtual bool isTokenChar(
wchar_t c) = 0;
49 virtual wchar_t normalize(
wchar_t c);
int32_t bufferIndex
Definition: CharTokenizer.h:26
OffsetAttributePtr offsetAtt
Definition: CharTokenizer.h:34
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
CharArray ioBuffer
Definition: CharTokenizer.h:32
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition: LuceneTypes.h:520
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
static const int32_t IO_BUFFER_SIZE
Definition: CharTokenizer.h:30
TermAttributePtr termAtt
Definition: CharTokenizer.h:33
int32_t dataLen
Definition: CharTokenizer.h:27
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< OffsetAttribute > OffsetAttributePtr
Definition: LuceneTypes.h:40
An abstract base class for simple, character-oriented tokenizers.
Definition: CharTokenizer.h:15
static const int32_t MAX_WORD_LEN
Definition: CharTokenizer.h:29
A Tokenizer is a TokenStream whose input is a Reader.
Definition: Tokenizer.h:20