|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectjava.util.Observable
org.exist.storage.TextSearchEngine
org.exist.storage.NativeTextEngine
This class is responsible for fulltext-indexing. Text-nodes are handed over to this class to be fulltext-indexed. Method storeText() is called by RelationalBroker whenever it finds a TextNode. Method getNodeIDsContaining() is used by the XPath-engine to process queries where a fulltext-operator is involved. The class keeps two database tables: table words stores the words found with their unique id. Table inv_idx contains the word occurrences for every word-id per document.
Field Summary | |
protected BFile |
dbWords
|
protected org.apache.oro.text.regex.PatternCompiler |
globCompiler
|
protected org.exist.storage.NativeTextEngine.InvertedIndex |
invIdx
|
protected org.apache.oro.text.regex.PatternMatcher |
matcher
|
protected org.apache.oro.text.regex.PatternCompiler |
regexCompiler
|
protected boolean |
useCompression
|
Fields inherited from class org.exist.storage.TextSearchEngine |
broker, config, indexNumbers, stem, stemmer, stoplist, tokenizer |
Constructor Summary | |
NativeTextEngine(DBBroker broker,
Configuration config)
|
Method Summary | |
void |
close()
|
protected void |
collect(java.util.HashSet words,
java.util.Iterator domIterator)
Collect all words in a document to be removed |
static boolean |
containsWildcards(java.lang.String str)
check if string contains non-letters (maybe it's a regular expression? |
void |
flush()
|
NodeSet[] |
getNodesContaining(DocumentSet docs,
java.lang.String[] expr)
Find all the nodes containing the search terms given by the array expr from the fulltext-index. |
NodeSet[] |
getNodesContaining(DocumentSet docs,
java.lang.String[] expr,
int type)
Get all the nodes containing the search terms given by the array expr using the fulltext-index. |
NodeSet[] |
getNodesExact(DocumentSet docs,
java.lang.String[] expr)
Get all nodes whose content exactly matches the terms passed in expr. |
void |
reindex(DocumentImpl oldDoc,
NodeImpl node)
Reindex a document or node. |
void |
remove()
|
void |
removeCollection(Collection collection)
Remove indexed words for entire collection |
void |
removeDocument(DocumentImpl doc)
Remove all index entries for the specified document |
Occurrences[] |
scanIndexTerms(User user,
Collection collection,
java.lang.String start,
java.lang.String end,
boolean inclusive)
Scan the fulltext index and return an Occurrences object for each of the index keys. |
static boolean |
startsWithWildcard(java.lang.String str)
|
void |
storeAttribute(IndexPaths idx,
AttrImpl attr)
Index an attribute value |
void |
storeText(IndexPaths idx,
TextImpl text)
Index a text node |
void |
sync()
|
Methods inherited from class org.exist.storage.TextSearchEngine |
getTokenizer |
Methods inherited from class java.util.Observable |
addObserver, clearChanged, countObservers, deleteObserver, deleteObservers, hasChanged, notifyObservers, notifyObservers, setChanged |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
protected BFile dbWords
protected org.exist.storage.NativeTextEngine.InvertedIndex invIdx
protected boolean useCompression
protected org.apache.oro.text.regex.PatternCompiler regexCompiler
protected org.apache.oro.text.regex.PatternCompiler globCompiler
protected org.apache.oro.text.regex.PatternMatcher matcher
Constructor Detail |
public NativeTextEngine(DBBroker broker, Configuration config)
Method Detail |
public static final boolean containsWildcards(java.lang.String str)
str
- Description of the Parameter
public static final boolean startsWithWildcard(java.lang.String str)
public void close()
close
in class TextSearchEngine
protected void collect(java.util.HashSet words, java.util.Iterator domIterator)
words
- Description of the ParameterdomIterator
- Description of the Parameterpublic void flush()
flush
in class TextSearchEngine
public void reindex(DocumentImpl oldDoc, NodeImpl node)
TextSearchEngine
reindex
in class TextSearchEngine
oldDoc
- node
- public void remove()
public NodeSet[] getNodesContaining(DocumentSet docs, java.lang.String[] expr)
getNodesContaining
in class TextSearchEngine
docs
- expr
-
public NodeSet[] getNodesContaining(DocumentSet docs, java.lang.String[] expr, int type)
getNodesContaining
in class TextSearchEngine
docs
- the input document setexpr
- array of search termstype
- either MATCH_EXACT or MATCH_REGEX
public NodeSet[] getNodesExact(DocumentSet docs, java.lang.String[] expr)
public Occurrences[] scanIndexTerms(User user, Collection collection, java.lang.String start, java.lang.String end, boolean inclusive) throws PermissionDeniedException
TextSearchEngine
scanIndexTerms
in class TextSearchEngine
user
- collection
- start
- end
- inclusive
-
PermissionDeniedException
public void removeCollection(Collection collection)
removeCollection
in class TextSearchEngine
collection
- Description of the Parameterpublic void removeDocument(DocumentImpl doc)
removeDocument
in class TextSearchEngine
doc
- The documentpublic void storeAttribute(IndexPaths idx, AttrImpl attr)
storeAttribute
in class TextSearchEngine
attr
- the attribute to be indexedidx
- public void storeText(IndexPaths idx, TextImpl text)
storeText
in class TextSearchEngine
idx
- IndexPaths object passed in by the brokertext
- the text node to be indexed
public void sync()
|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |