1*e8e4245dSRobert Muir<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 2*e8e4245dSRobert Muir<html> <head> 3*e8e4245dSRobert Muir<meta http-equiv='CONTENT-TYPE' content='text/html; charset=UTF-8' /> 4*e8e4245dSRobert Muir<link rel='StyleSheet' href='DTDDocStyle.css' type='text/css' media='screen' /> 5*e8e4245dSRobert Muir<title>Contrib Lucene</title> 6*e8e4245dSRobert Muir</head><body> 7*e8e4245dSRobert Muir<p class='DTDSource'><b><code>LuceneContribQuery.dtd</code></b>: <a href='LuceneContribQuery.dtd.html'>Elements</a> - <a href='LuceneContribQuery.dtd.entities.html'>Entities</a> - <a href='LuceneContribQuery.dtd.org.html'>Source</a> | <a href='intro.html'>Intro</a> - <a href='elementsIndex.html'>Index</a><br /><a href='index.html' target='_top'>FRAMES</a> / <a href='LuceneContribQuery.dtd.org.html' target='_top'>NO FRAMES</a></p><pre id='dtd_source'><span class="dtd_comment"><!-- </span> 8*e8e4245dSRobert Muir<span class="dtd_comment"> This DTD builds on the <a href="LuceneCoreQuery.dtd.html">core Lucene XML syntax</a> and adds support for features found in the "contrib" section of the Lucene project.</span> 9*e8e4245dSRobert Muir<span class="dtd_comment"> </span> 10*e8e4245dSRobert Muir<span class="dtd_comment"> CorePlusExtensionsParser.java is the Java class that encapsulates this parser behaviour.</span> 11*e8e4245dSRobert Muir 12*e8e4245dSRobert Muir<span class="dtd_comment"> </span> 13*e8e4245dSRobert Muir<span class="dtd_comment"> The features added are:</span> 14*e8e4245dSRobert Muir<span class="dtd_comment"> <ul></span> 15*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#LikeThisQuery">LikeThisQuery</a></li></span> 16*e8e4245dSRobert Muir<span class="dtd_comment"> Support for querying using large amounts of example text indicative of the users' general area of interest</span> 17*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#FuzzyLikeThisQuery">FuzzyLikeThisQuery</a></li></span> 18*e8e4245dSRobert Muir<span class="dtd_comment"> A style of fuzzy query which automatically looks for fuzzy variations on only the "interesting" terms </span> 19*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#BooleanFilter">BooleanFilter</a></li></span> 20*e8e4245dSRobert Muir<span class="dtd_comment"> Is to Filters what core Lucene's BooleanQuery is to Queries - allows mixing of clauses using Boolean logic</span> 21*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#TermsFilter">TermsFilter</a></li></span> 22*e8e4245dSRobert Muir<span class="dtd_comment"> Constructs a filter from an arbitrary set of terms (unlike <a href="#RangeFilter">RangeFilter</a> which requires a contiguous range of terms)</span> 23*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#DuplicateFilter">DuplicateFilter</a></li></span> 24*e8e4245dSRobert Muir<span class="dtd_comment"> Removes duplicated documents from results where "duplicate" means documents share a value for a particular field (e.g. a primary key)</span> 25*e8e4245dSRobert Muir<span class="dtd_comment"> <li><a href="#BoostingQuery">BoostingQuery</a></li></span> 26*e8e4245dSRobert Muir<span class="dtd_comment"> Influence score of a query's matches in a subtle way which can't be achieved using BooleanQuery</span> 27*e8e4245dSRobert Muir<span class="dtd_comment"> </ul></span> 28*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@title</span><span class="dtd_comment"> Contrib Lucene</span> 29*e8e4245dSRobert Muir<span class="dtd_comment">--></span> 30*e8e4245dSRobert Muir<span class="dtd_comment"><!-- </span><span class="dtd_dtddoc_tag">@hidden</span><span class="dtd_comment"> include the core DTD --></span> 31*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">coreParserDTD</span><span class="dtd_plain"> </span><span class="dtd_keyword">SYSTEM</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"LuceneCoreQuery.dtd"</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 32*e8e4245dSRobert Muir 33*e8e4245dSRobert Muir 34*e8e4245dSRobert Muir<span class="dtd_comment"><!-- </span><span class="dtd_dtddoc_tag">@hidden</span><span class="dtd_comment"> Allow for extensions --></span> 35*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedSpanQueries2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">" "</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 36*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedQueries2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">" "</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 37*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedFilters2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">" "</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 38*e8e4245dSRobert Muir 39*e8e4245dSRobert Muir 40*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedQueries1</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"|LikeThisQuery|BoostingQuery|FuzzyLikeThisQuery%extendedQueries2;%extendedSpanQueries2;"</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 41*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedFilters1</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"|TermsFilter|BooleanFilter|DuplicateFilter%extendedFilters2;"</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">></span> 42*e8e4245dSRobert Muir 43*e8e4245dSRobert Muir 44*e8e4245dSRobert Muir<span class="dtd_plain">%coreParserDTD;</span> 45*e8e4245dSRobert Muir 46*e8e4245dSRobert Muir<span class="dtd_comment"><!--</span> 47*e8e4245dSRobert Muir<span class="dtd_comment">Performs fuzzy matching on "significant" terms in fields. Improves on "LikeThisQuery" by allowing for fuzzy variations of supplied fields.</span> 48*e8e4245dSRobert Muir<span class="dtd_comment">Improves on FuzzyQuery by rewarding all fuzzy variants of a term with the same IDF rather than default fuzzy behaviour which ranks rarer</span> 49*e8e4245dSRobert Muir<span class="dtd_comment"> variants (typically misspellings) more highly. This can be a useful default search mode for processing user input where the end user</span> 50*e8e4245dSRobert Muir<span class="dtd_comment"> is not expected to know about the standard query operators for fuzzy, boolean or phrase logic found in UserQuery</span> 51*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> </span> 52*e8e4245dSRobert Muir<span class="dtd_comment"> <em>Search for information about the Sumitomo bank, where the end user has mis-spelt the name</em></span> 53*e8e4245dSRobert Muir<span class="dtd_comment"> % </span> 54*e8e4245dSRobert Muir<span class="dtd_comment"> <FuzzyLikeThisQuery></span> 55*e8e4245dSRobert Muir<span class="dtd_comment"> <Field fieldName="contents"></span> 56*e8e4245dSRobert Muir<span class="dtd_comment"> Sumitimo bank</span> 57*e8e4245dSRobert Muir<span class="dtd_comment"> </Field></span> 58*e8e4245dSRobert Muir<span class="dtd_comment"> </FuzzyLikeThisQuery></span> 59*e8e4245dSRobert Muir<span class="dtd_comment"> % </span> 60*e8e4245dSRobert Muir<span class="dtd_comment">--></span> 61*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Field</span><span class="dtd_plain">)*</span><span class="dtd_tag_symbols">></span> 62*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Optional boost for matches on this query. Values > 1 --></span> 63*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1.0"</span><span class="dtd_tag_symbols">></span> 64*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Limits the total number of terms selected from the provided text plus the selected "fuzzy" variants --></span> 65*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">maxNumTerms</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"50"</span><span class="dtd_tag_symbols">></span> 66*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Ignore "Term Frequency" - a boost factor which rewards multiple occurences of the same term in a document --></span> 67*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">ignoreTF</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">true</span><span class="dtd_plain">|</span><span class="dtd_attribute_name">false</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">"false"</span><span class="dtd_tag_symbols">></span> 68*e8e4245dSRobert Muir<span class="dtd_comment"><!-- A field used in a FuzzyLikeThisQuery --></span> 69*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">></span> 70*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Controls the level of similarity required for fuzzy variants where 1 is identical and 0.5 is that the variant contains </span> 71*e8e4245dSRobert Muir<span class="dtd_comment"> half of the original's characters in the same order. Lower values produce more results but may take longer to execute due to</span> 72*e8e4245dSRobert Muir<span class="dtd_comment"> additional IO required to read matching document ids--></span> 73*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">minSimilarity</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"0.5"</span><span class="dtd_tag_symbols">></span> 74*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Controls the minimum number of characters at the start of fuzzy variant words that must exactly match the original.</span> 75*e8e4245dSRobert Muir<span class="dtd_comment"> A value of zero will require no minimum and the search software will effectively scan ALL terms from a to z looking for variations.</span> 76*e8e4245dSRobert Muir<span class="dtd_comment"> This can incur high CPU overhead and a prefix length of just "1" will reduce this overhead to 1/26th of the original cost (assuming</span> 77*e8e4245dSRobert Muir<span class="dtd_comment"> an even distribution of letters used from the alphabet).</span> 78*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 79*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">prefixLength</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1"</span><span class="dtd_tag_symbols">></span> 80*e8e4245dSRobert Muir<span class="dtd_comment"><!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --></span><span class="dtd_plain"> </span> 81*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">></span> 82*e8e4245dSRobert Muir 83*e8e4245dSRobert Muir 84*e8e4245dSRobert Muir 85*e8e4245dSRobert Muir<span class="dtd_comment"><!--</span> 86*e8e4245dSRobert Muir<span class="dtd_comment"> Cherry-picks "significant" terms from the example child text and queries using these words. By only using significant (read: rare) terms the</span> 87*e8e4245dSRobert Muir<span class="dtd_comment"> performance cost of the query is substantially reduced and large bodies of text can be used as example content.</span> 88*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> </span> 89*e8e4245dSRobert Muir<span class="dtd_comment"> <em>Use a block of text as an example of the type of content to be found, ignoring the "Reuters" word which</span> 90*e8e4245dSRobert Muir<span class="dtd_comment"> appears commonly in the index.</em></span> 91*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 92*e8e4245dSRobert Muir<span class="dtd_comment"> <LikeThisQuery percentTermsToMatch="5" stopWords="Reuters"></span> 93*e8e4245dSRobert Muir<span class="dtd_comment"> IRAQI TROOPS REPORTED PUSHING BACK IRANIANS Iraq said today its troops were pushing Iranian forces out of </span> 94*e8e4245dSRobert Muir<span class="dtd_comment"> positions they had initially occupied when they launched a new offensive near the southern port of </span> 95*e8e4245dSRobert Muir<span class="dtd_comment"> Basra early yesterday. A High Command communique said Iraqi troops had won a significant victory </span> 96*e8e4245dSRobert Muir<span class="dtd_comment"> and were continuing to advance. Iraq said it had foiled a three-pronged thrust some 10 km </span> 97*e8e4245dSRobert Muir<span class="dtd_comment"> (six miles) from Basra, but admitted the Iranians had occupied ground held by the Mohammed al-Qassem </span> 98*e8e4245dSRobert Muir<span class="dtd_comment"> unit, one of three divisions attacked. The communique said Iranian Revolutionary Guards were under </span> 99*e8e4245dSRobert Muir<span class="dtd_comment"> assault from warplanes, helicopter gunships, heavy artillery and tanks. "Our forces are continuing </span> 100*e8e4245dSRobert Muir<span class="dtd_comment"> their advance until they purge the last foothold" occupied by the Iranians, it said. </span> 101*e8e4245dSRobert Muir<span class="dtd_comment"> (Iran said its troops had killed or wounded more than 4,000 Iraqis and were stabilising their new positions.) </span> 102*e8e4245dSRobert Muir<span class="dtd_comment"> The Baghdad communique said Iraqi planes also destroyed oil installations at Iran's southwestern Ahvaz field </span> 103*e8e4245dSRobert Muir<span class="dtd_comment"> during a raid today. It denied an Iranian report that an Iraqi jet was shot down. </span> 104*e8e4245dSRobert Muir<span class="dtd_comment"> Iraq also reported a naval battle at the northern tip of the Gulf. Iraqi naval units and forces defending an </span> 105*e8e4245dSRobert Muir<span class="dtd_comment"> offshore terminal sank six Iranian out of 28 Iranian boats attempting to attack an offshore terminal, </span> 106*e8e4245dSRobert Muir<span class="dtd_comment"> the communique said. Reuters 3;</span> 107*e8e4245dSRobert Muir<span class="dtd_comment"> </LikeThisQuery> </span> 108*e8e4245dSRobert Muir<span class="dtd_comment"> % </span> 109*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 110*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">></span> 111*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Optional boost for matches on this query. Values > 1 --></span> 112*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1.0"</span><span class="dtd_tag_symbols">></span> 113*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Comma delimited list of field names --></span> 114*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldNames</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">></span> 115*e8e4245dSRobert Muir<span class="dtd_comment"><!-- a list of stop words - analyzed to produce stop terms --></span> 116*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">stopWords</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">></span> 117*e8e4245dSRobert Muir<span class="dtd_comment"><!-- controls the maximum number of words shortlisted for the query. The higher the number the slower the response due to more disk reads required --></span> 118*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">maxQueryTerms</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"20"</span><span class="dtd_tag_symbols">></span> 119*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Controls how many times a term must appear in the example text before it is shortlisted for use in the query --></span> 120*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">minTermFrequency</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1"</span><span class="dtd_tag_symbols">></span> 121*e8e4245dSRobert Muir<span class="dtd_comment"><!-- A quality control that can be used to limit the number of results to those documents matching a certain percentage of the shortlisted query terms.</span> 122*e8e4245dSRobert Muir<span class="dtd_comment"> Values must be between 1 and 100--></span> 123*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">percentTermsToMatch</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"30"</span><span class="dtd_tag_symbols">></span> 124*e8e4245dSRobert Muir 125*e8e4245dSRobert Muir<span class="dtd_comment"><!--</span> 126*e8e4245dSRobert Muir<span class="dtd_comment"> Requires matches on the "Query" element and optionally boosts by any matches on the "BoostQuery".</span> 127*e8e4245dSRobert Muir<span class="dtd_comment"> Unlike a regular BooleanQuery the boost can be less than 1 to produce a subtractive rather than additive result</span> 128*e8e4245dSRobert Muir<span class="dtd_comment"> on the match score. </span> 129*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> <em>Find documents about banks, preferably related to mergers, and preferably not about "World bank"</em></span> 130*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 131*e8e4245dSRobert Muir<span class="dtd_comment"> <BoostingQuery></span> 132*e8e4245dSRobert Muir<span class="dtd_comment"> <Query></span> 133*e8e4245dSRobert Muir<span class="dtd_comment"> <BooleanQuery fieldName="contents"></span> 134*e8e4245dSRobert Muir<span class="dtd_comment"> <Clause occurs="should"></span> 135*e8e4245dSRobert Muir<span class="dtd_comment"> <TermQuery>merger</TermQuery></span> 136*e8e4245dSRobert Muir<span class="dtd_comment"> </Clause></span> 137*e8e4245dSRobert Muir<span class="dtd_comment"> <Clause occurs="must"></span> 138*e8e4245dSRobert Muir<span class="dtd_comment"> <TermQuery>bank</TermQuery></span> 139*e8e4245dSRobert Muir<span class="dtd_comment"> </Clause></span> 140*e8e4245dSRobert Muir<span class="dtd_comment"> </BooleanQuery> </span> 141*e8e4245dSRobert Muir<span class="dtd_comment"> </Query></span> 142*e8e4245dSRobert Muir<span class="dtd_comment"> <BoostQuery boost="0.01"></span> 143*e8e4245dSRobert Muir<span class="dtd_comment"> <UserQuery>"world bank"</UserQuery></span> 144*e8e4245dSRobert Muir<span class="dtd_comment"> </BoostQuery></span> 145*e8e4245dSRobert Muir<span class="dtd_comment"> </BoostingQuery></span> 146*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 147*e8e4245dSRobert Muir<span class="dtd_comment"> </span> 148*e8e4245dSRobert Muir<span class="dtd_comment">--></span><span class="dtd_plain"> </span> 149*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostingQuery</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Query</span><span class="dtd_plain">,</span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">></span> 150*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Optional boost for matches on this query. Values > 1 --></span> 151*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostingQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1.0"</span><span class="dtd_tag_symbols">></span> 152*e8e4245dSRobert Muir 153*e8e4245dSRobert Muir<span class="dtd_comment"><!--</span> 154*e8e4245dSRobert Muir<span class="dtd_comment"> Child element of BoostingQuery used to contain the choice of Query which is used for boosting purposes</span> 155*e8e4245dSRobert Muir<span class="dtd_comment">--></span><span class="dtd_plain"> </span> 156*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain"> (%</span><span class="dtd_attribute_name">queries</span><span class="dtd_plain">;)</span><span class="dtd_tag_symbols">></span> 157*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Optional boost for matches on this query. A boost of >0 but <1 </span> 158*e8e4245dSRobert Muir<span class="dtd_comment"> effectively demotes results from Query that match this BoostQuery. </span> 159*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 160*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">"1.0"</span><span class="dtd_tag_symbols">></span> 161*e8e4245dSRobert Muir 162*e8e4245dSRobert Muir 163*e8e4245dSRobert Muir 164*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Removes duplicated documents from results where "duplicate" means documents share a value for a particular field such as a primary key</span> 165*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> <em>Find the latest version of each web page that mentions "Lucene"</em></span> 166*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 167*e8e4245dSRobert Muir<span class="dtd_comment"> <FilteredQuery></span> 168*e8e4245dSRobert Muir<span class="dtd_comment"> <Query></span> 169*e8e4245dSRobert Muir<span class="dtd_comment"> <TermQuery fieldName="text">lucene</TermQuery></span> 170*e8e4245dSRobert Muir<span class="dtd_comment"> </Query></span> 171*e8e4245dSRobert Muir<span class="dtd_comment"> <Filter></span> 172*e8e4245dSRobert Muir<span class="dtd_comment"> <DuplicateFilter fieldName="url" keepMode="last"/></span> 173*e8e4245dSRobert Muir<span class="dtd_comment"> </Filter> </span> 174*e8e4245dSRobert Muir<span class="dtd_comment"> </FilteredQuery> </span> 175*e8e4245dSRobert Muir<span class="dtd_comment"> % </span> 176*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 177*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_keyword">EMPTY</span><span class="dtd_tag_symbols">></span> 178*e8e4245dSRobert Muir<span class="dtd_comment"><!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --></span><span class="dtd_plain"> </span> 179*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">></span> 180*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Determines if the first or last document occurence is the one to return when presented with duplicated field values --></span><span class="dtd_plain"> </span> 181*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">keepMode</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">first</span><span class="dtd_plain"> | </span><span class="dtd_attribute_name">last</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">"first"</span><span class="dtd_tag_symbols">></span> 182*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Controls the choice of process used to produce the filter - "full" mode identifies only non-duplicate documents with the chosen field </span> 183*e8e4245dSRobert Muir<span class="dtd_comment"> while "fast" mode may perform faster but will also mark documents <em>without</em> the field as valid. The former approach starts by </span> 184*e8e4245dSRobert Muir<span class="dtd_comment"> assuming every document is a duplicate then finds the "master" documents to keep while the latter approach assumes all documents are </span> 185*e8e4245dSRobert Muir<span class="dtd_comment"> unique and unmarks those documents that are a copy. </span> 186*e8e4245dSRobert Muir<span class="dtd_comment"> --></span><span class="dtd_plain"> </span> 187*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">processingMode</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">full</span><span class="dtd_plain"> | </span><span class="dtd_attribute_name">fast</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">"full"</span><span class="dtd_tag_symbols">></span> 188*e8e4245dSRobert Muir 189*e8e4245dSRobert Muir 190*e8e4245dSRobert Muir 191*e8e4245dSRobert Muir 192*e8e4245dSRobert Muir<span class="dtd_comment"><!-- Processes child text using a field-specific choice of Analyzer to produce a set of terms that are then used as a filter.</span> 193*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> <em>Find documents talking about Lucene written on a Monday or a Friday</em></span> 194*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 195*e8e4245dSRobert Muir<span class="dtd_comment"> <FilteredQuery></span> 196*e8e4245dSRobert Muir<span class="dtd_comment"> <Query></span> 197*e8e4245dSRobert Muir<span class="dtd_comment"> <TermQuery fieldName="text">lucene</TermQuery></span> 198*e8e4245dSRobert Muir<span class="dtd_comment"> </Query></span> 199*e8e4245dSRobert Muir<span class="dtd_comment"> <Filter></span> 200*e8e4245dSRobert Muir<span class="dtd_comment"> <TermsFilter fieldName="dayOfWeek">monday friday</TermsFilter> </span> 201*e8e4245dSRobert Muir<span class="dtd_comment"> </Filter> </span> 202*e8e4245dSRobert Muir<span class="dtd_comment"> </FilteredQuery> </span> 203*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 204*e8e4245dSRobert Muir<span class="dtd_comment"> </span> 205*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 206*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">TermsFilter</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">></span> 207*e8e4245dSRobert Muir<span class="dtd_comment"><!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --></span><span class="dtd_plain"> </span> 208*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">TermsFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">></span> 209*e8e4245dSRobert Muir<span class="dtd_comment"><!--</span> 210*e8e4245dSRobert Muir<span class="dtd_comment"> A Filter equivalent to BooleanQuery that applies Boolean logic to Clauses containing Filters.</span> 211*e8e4245dSRobert Muir<span class="dtd_comment"> Unlike BooleanQuery a BooleanFilter can contain a single "mustNot" clause.</span> 212*e8e4245dSRobert Muir<span class="dtd_comment"> </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> <em>Find documents from the first quarter of this year or last year that are not in "draft" status</em></span> 213*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 214*e8e4245dSRobert Muir<span class="dtd_comment"> <FilteredQuery></span> 215*e8e4245dSRobert Muir<span class="dtd_comment"> <Query></span> 216*e8e4245dSRobert Muir<span class="dtd_comment"> <MatchAllDocsQuery/></span> 217*e8e4245dSRobert Muir<span class="dtd_comment"> </Query></span> 218*e8e4245dSRobert Muir<span class="dtd_comment"> <Filter></span> 219*e8e4245dSRobert Muir<span class="dtd_comment"> <BooleanFilter></span> 220*e8e4245dSRobert Muir<span class="dtd_comment"> <Clause occurs="should"></span> 221*e8e4245dSRobert Muir<span class="dtd_comment"> <RangeFilter fieldName="date" lowerTerm="20070101" upperTerm="20070401"/></span> 222*e8e4245dSRobert Muir<span class="dtd_comment"> </Clause></span> 223*e8e4245dSRobert Muir<span class="dtd_comment"> <Clause occurs="should"></span> 224*e8e4245dSRobert Muir<span class="dtd_comment"> <RangeFilter fieldName="date" lowerTerm="20060101" upperTerm="20060401"/></span> 225*e8e4245dSRobert Muir<span class="dtd_comment"> </Clause></span> 226*e8e4245dSRobert Muir<span class="dtd_comment"> <Clause occurs="mustNot"></span> 227*e8e4245dSRobert Muir<span class="dtd_comment"> <TermsFilter fieldName="status">draft</TermsFilter> </span> 228*e8e4245dSRobert Muir<span class="dtd_comment"> </Clause></span> 229*e8e4245dSRobert Muir<span class="dtd_comment"> </BooleanFilter></span> 230*e8e4245dSRobert Muir<span class="dtd_comment"> </Filter></span> 231*e8e4245dSRobert Muir<span class="dtd_comment"> </FilteredQuery></span> 232*e8e4245dSRobert Muir<span class="dtd_comment"> %</span> 233*e8e4245dSRobert Muir<span class="dtd_comment"> --></span> 234*e8e4245dSRobert Muir<span class="dtd_tag_symbols"><!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BooleanFilter</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Clause</span><span class="dtd_plain">)+</span><span class="dtd_tag_symbols">></span> 235*e8e4245dSRobert Muir 236*e8e4245dSRobert Muir</pre> 237*e8e4245dSRobert Muir</body></html> 238