Lucene++ - a full-featured, c++ search engine
API Documentation


StandardAnalyzer.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef STANDARDANALYZER_H
8 #define STANDARDANALYZER_H
9 
10 #include "Analyzer.h"
11 
12 namespace Lucene {
13 
23 class LPPAPI StandardAnalyzer : public Analyzer {
24 public:
28 
33 
38  StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords);
39 
44  StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords);
45 
46  virtual ~StandardAnalyzer();
47 
49 
50 public:
52  static const int32_t DEFAULT_MAX_TOKEN_LENGTH;
53 
54 protected:
56 
60 
62 
63  int32_t maxTokenLength;
64 
65 protected:
68 
69 public:
72  virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader);
73 
76  void setMaxTokenLength(int32_t length);
77 
79  int32_t getMaxTokenLength();
80 
81  virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader);
82 };
83 
84 }
85 
86 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index...
Definition: Analyzer.h:19
Version
Definition: Constants.h:40
Filters StandardTokenizer with StandardFilter, LowerCaseFilter and StopFilter, using a list of Englis...
Definition: StandardAnalyzer.h:23
StandardAnalyzer(LuceneVersion::Version matchVersion)
Builds an analyzer with the default stop words (STOP_WORDS_SET).
void setMaxTokenLength(int32_t length)
Set maximum allowed token length. If a token is seen that exceeds this length then it is discarded....
LuceneVersion::Version matchVersion
Definition: StandardAnalyzer.h:61
virtual TokenStreamPtr tokenStream(const String &fieldName, const ReaderPtr &reader)
Constructs a StandardTokenizer filtered by a StandardFilter, a LowerCaseFilter and a StopFilter.
StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet< String > stopWords)
Builds an analyzer with the given stop words.
bool replaceInvalidAcronym
Specifies whether deprecated acronyms should be replaced with HOST type.
Definition: StandardAnalyzer.h:58
HashSet< String > stopSet
Definition: StandardAnalyzer.h:55
int32_t maxTokenLength
Definition: StandardAnalyzer.h:63
bool enableStopPositionIncrements
Definition: StandardAnalyzer.h:59
virtual TokenStreamPtr reusableTokenStream(const String &fieldName, const ReaderPtr &reader)
Creates a TokenStream that is allowed to be re-used from the previous time that the same thread calle...
StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr &stopwords)
Builds an analyzer with the stop words from the given reader.
void ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet< String > stopWords)
Construct an analyzer with the given stop words.
StandardAnalyzer(LuceneVersion::Version matchVersion, const String &stopwords)
Builds an analyzer with the stop words from the given file.
static const int32_t DEFAULT_MAX_TOKEN_LENGTH
Default maximum allowed token length.
Definition: StandardAnalyzer.h:48
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< TokenStream > TokenStreamPtr
Definition: LuceneTypes.h:63
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547

clucene.sourceforge.net