xref: /Lucene/lucene/queryparser/docs/xml/LuceneContribQuery.dtd.org.html (revision e8e4245d9b36123446546ff15967ac95429ea2b0)
1*e8e4245dSRobert Muir<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2*e8e4245dSRobert Muir<html> <head>
3*e8e4245dSRobert Muir<meta http-equiv='CONTENT-TYPE' content='text/html; charset=UTF-8' />
4*e8e4245dSRobert Muir<link rel='StyleSheet' href='DTDDocStyle.css' type='text/css' media='screen' />
5*e8e4245dSRobert Muir<title>Contrib Lucene</title>
6*e8e4245dSRobert Muir</head><body>
7*e8e4245dSRobert Muir<p class='DTDSource'><b><code>LuceneContribQuery.dtd</code></b>: <a href='LuceneContribQuery.dtd.html'>Elements</a> - <a href='LuceneContribQuery.dtd.entities.html'>Entities</a> - <a href='LuceneContribQuery.dtd.org.html'>Source</a> | <a href='intro.html'>Intro</a> - <a href='elementsIndex.html'>Index</a><br /><a href='index.html' target='_top'>FRAMES</a>&nbsp;/&nbsp;<a href='LuceneContribQuery.dtd.org.html' target='_top'>NO FRAMES</a></p><pre id='dtd_source'><span class="dtd_comment">&lt;!--    </span>
8*e8e4245dSRobert Muir<span class="dtd_comment">    This DTD builds on the &lt;a href=&quot;LuceneCoreQuery.dtd.html&quot;&gt;core Lucene XML syntax&lt;/a&gt; and adds support for features found in the &quot;contrib&quot; section of the Lucene project.</span>
9*e8e4245dSRobert Muir<span class="dtd_comment">    </span>
10*e8e4245dSRobert Muir<span class="dtd_comment">    CorePlusExtensionsParser.java is the Java class that encapsulates this parser behaviour.</span>
11*e8e4245dSRobert Muir
12*e8e4245dSRobert Muir<span class="dtd_comment">    </span>
13*e8e4245dSRobert Muir<span class="dtd_comment">    The features added are:</span>
14*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;ul&gt;</span>
15*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#LikeThisQuery&quot;&gt;LikeThisQuery&lt;/a&gt;&lt;/li&gt;</span>
16*e8e4245dSRobert Muir<span class="dtd_comment">       Support for querying using large amounts of example text indicative of the users' general area of interest</span>
17*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#FuzzyLikeThisQuery&quot;&gt;FuzzyLikeThisQuery&lt;/a&gt;&lt;/li&gt;</span>
18*e8e4245dSRobert Muir<span class="dtd_comment">       A style of fuzzy query which automatically looks for fuzzy variations on only the &quot;interesting&quot; terms </span>
19*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#BooleanFilter&quot;&gt;BooleanFilter&lt;/a&gt;&lt;/li&gt;</span>
20*e8e4245dSRobert Muir<span class="dtd_comment">       Is to Filters what core Lucene's BooleanQuery is to Queries - allows mixing of clauses using Boolean logic</span>
21*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#TermsFilter&quot;&gt;TermsFilter&lt;/a&gt;&lt;/li&gt;</span>
22*e8e4245dSRobert Muir<span class="dtd_comment">       Constructs a filter from an arbitrary set of terms (unlike &lt;a href=&quot;#RangeFilter&quot;&gt;RangeFilter&lt;/a&gt; which requires a contiguous range of terms)</span>
23*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#DuplicateFilter&quot;&gt;DuplicateFilter&lt;/a&gt;&lt;/li&gt;</span>
24*e8e4245dSRobert Muir<span class="dtd_comment">       Removes duplicated documents from results where &quot;duplicate&quot; means documents share a value for a particular field (e.g. a primary key)</span>
25*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;li&gt;&lt;a href=&quot;#BoostingQuery&quot;&gt;BoostingQuery&lt;/a&gt;&lt;/li&gt;</span>
26*e8e4245dSRobert Muir<span class="dtd_comment">       Influence score of a query's matches in a subtle way which can't be achieved using BooleanQuery</span>
27*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/ul&gt;</span>
28*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@title</span><span class="dtd_comment"> Contrib Lucene</span>
29*e8e4245dSRobert Muir<span class="dtd_comment">--&gt;</span>
30*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- </span><span class="dtd_dtddoc_tag">@hidden</span><span class="dtd_comment"> include the core DTD --&gt;</span>
31*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">coreParserDTD</span><span class="dtd_plain"> </span><span class="dtd_keyword">SYSTEM</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;LuceneCoreQuery.dtd&quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
32*e8e4245dSRobert Muir
33*e8e4245dSRobert Muir
34*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- </span><span class="dtd_dtddoc_tag">@hidden</span><span class="dtd_comment"> Allow for extensions --&gt;</span>
35*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedSpanQueries2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot; &quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
36*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedQueries2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot; &quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
37*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedFilters2</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot; &quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
38*e8e4245dSRobert Muir
39*e8e4245dSRobert Muir
40*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedQueries1</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;|LikeThisQuery|BoostingQuery|FuzzyLikeThisQuery%extendedQueries2;%extendedSpanQueries2;&quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
41*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ENTITY</span><span class="dtd_plain"> % </span><span class="dtd_attribute_name">extendedFilters1</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;|TermsFilter|BooleanFilter|DuplicateFilter%extendedFilters2;&quot;</span><span class="dtd_plain"> </span><span class="dtd_tag_symbols">&gt;</span>
42*e8e4245dSRobert Muir
43*e8e4245dSRobert Muir
44*e8e4245dSRobert Muir<span class="dtd_plain">%coreParserDTD;</span>
45*e8e4245dSRobert Muir
46*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!--</span>
47*e8e4245dSRobert Muir<span class="dtd_comment">Performs fuzzy matching on &quot;significant&quot; terms in fields. Improves on &quot;LikeThisQuery&quot; by allowing for fuzzy variations of supplied fields.</span>
48*e8e4245dSRobert Muir<span class="dtd_comment">Improves on FuzzyQuery by rewarding all fuzzy variants of a term with the same IDF rather than default fuzzy behaviour which ranks rarer</span>
49*e8e4245dSRobert Muir<span class="dtd_comment">    variants (typically misspellings) more highly. This can be a useful default search mode for processing user input where the end user</span>
50*e8e4245dSRobert Muir<span class="dtd_comment">    is not expected to know about the standard query operators for fuzzy, boolean or phrase logic found in UserQuery</span>
51*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> </span>
52*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;em&gt;Search for information about the Sumitomo bank, where the end user has mis-spelt the name&lt;/em&gt;</span>
53*e8e4245dSRobert Muir<span class="dtd_comment">            %             </span>
54*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;FuzzyLikeThisQuery&gt;</span>
55*e8e4245dSRobert Muir<span class="dtd_comment">                &lt;Field fieldName=&quot;contents&quot;&gt;</span>
56*e8e4245dSRobert Muir<span class="dtd_comment">                     Sumitimo bank</span>
57*e8e4245dSRobert Muir<span class="dtd_comment">                &lt;/Field&gt;</span>
58*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;/FuzzyLikeThisQuery&gt;</span>
59*e8e4245dSRobert Muir<span class="dtd_comment">             %  </span>
60*e8e4245dSRobert Muir<span class="dtd_comment">--&gt;</span>
61*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Field</span><span class="dtd_plain">)*</span><span class="dtd_tag_symbols">&gt;</span>
62*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Optional boost for matches on this query. Values &gt; 1 --&gt;</span>
63*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1.0&quot;</span><span class="dtd_tag_symbols">&gt;</span>
64*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Limits the total number of terms selected from the provided text plus the selected &quot;fuzzy&quot; variants --&gt;</span>
65*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">maxNumTerms</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;50&quot;</span><span class="dtd_tag_symbols">&gt;</span>
66*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Ignore &quot;Term Frequency&quot; - a boost factor which rewards multiple occurences of the same term in a document --&gt;</span>
67*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">FuzzyLikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">ignoreTF</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">true</span><span class="dtd_plain">|</span><span class="dtd_attribute_name">false</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">&quot;false&quot;</span><span class="dtd_tag_symbols">&gt;</span>
68*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- A field used in a FuzzyLikeThisQuery --&gt;</span>
69*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">&gt;</span>
70*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Controls the level of similarity required for fuzzy variants where 1 is identical and 0.5 is that the variant contains </span>
71*e8e4245dSRobert Muir<span class="dtd_comment">    half of the original's characters in the same order. Lower values produce more results but may take longer to execute due to</span>
72*e8e4245dSRobert Muir<span class="dtd_comment">    additional IO required to read matching document ids--&gt;</span>
73*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">minSimilarity</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;0.5&quot;</span><span class="dtd_tag_symbols">&gt;</span>
74*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Controls the minimum number of characters at the start of fuzzy variant words that must exactly match the original.</span>
75*e8e4245dSRobert Muir<span class="dtd_comment">    A value of zero will require no minimum and the search software will effectively scan ALL terms from a to z looking for variations.</span>
76*e8e4245dSRobert Muir<span class="dtd_comment">    This can incur high CPU overhead and a prefix length of just &quot;1&quot; will reduce this overhead to 1/26th of the original cost (assuming</span>
77*e8e4245dSRobert Muir<span class="dtd_comment">    an even distribution of letters used from the alphabet).</span>
78*e8e4245dSRobert Muir<span class="dtd_comment"> --&gt;</span>
79*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">prefixLength</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1&quot;</span><span class="dtd_tag_symbols">&gt;</span>
80*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a &quot;fieldName&quot; attribute --&gt;</span><span class="dtd_plain"> </span>
81*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">Field</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">&gt;</span>
82*e8e4245dSRobert Muir
83*e8e4245dSRobert Muir
84*e8e4245dSRobert Muir
85*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!--</span>
86*e8e4245dSRobert Muir<span class="dtd_comment">    Cherry-picks &quot;significant&quot; terms from the example child text and queries using these words. By only using significant (read: rare) terms the</span>
87*e8e4245dSRobert Muir<span class="dtd_comment">    performance cost of the query is substantially reduced and large bodies of text can be used as example content.</span>
88*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> </span>
89*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;em&gt;Use a block of text as an example of the type of content to be found, ignoring the &quot;Reuters&quot; word which</span>
90*e8e4245dSRobert Muir<span class="dtd_comment">           appears commonly in the index.&lt;/em&gt;</span>
91*e8e4245dSRobert Muir<span class="dtd_comment">            %</span>
92*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;LikeThisQuery percentTermsToMatch=&quot;5&quot; stopWords=&quot;Reuters&quot;&gt;</span>
93*e8e4245dSRobert Muir<span class="dtd_comment">                IRAQI TROOPS REPORTED PUSHING BACK IRANIANS Iraq said today its troops were pushing Iranian forces out of </span>
94*e8e4245dSRobert Muir<span class="dtd_comment">                positions they had initially occupied when they launched a new offensive near the southern port of </span>
95*e8e4245dSRobert Muir<span class="dtd_comment">                Basra early yesterday.     A High Command communique said Iraqi troops had won a significant victory </span>
96*e8e4245dSRobert Muir<span class="dtd_comment">                and were continuing to advance.     Iraq said it had foiled a three-pronged thrust some 10 km </span>
97*e8e4245dSRobert Muir<span class="dtd_comment">                (six miles) from Basra, but admitted the Iranians had occupied ground held by the Mohammed al-Qassem </span>
98*e8e4245dSRobert Muir<span class="dtd_comment">                unit, one of three divisions attacked.     The communique said Iranian Revolutionary Guards were under </span>
99*e8e4245dSRobert Muir<span class="dtd_comment">                assault from warplanes, helicopter gunships, heavy artillery and tanks.     &quot;Our forces are continuing </span>
100*e8e4245dSRobert Muir<span class="dtd_comment">                their advance until they purge the last foothold&quot; occupied by the Iranians, it said.     </span>
101*e8e4245dSRobert Muir<span class="dtd_comment">                (Iran said its troops had killed or wounded more than 4,000 Iraqis and were stabilising their new positions.)     </span>
102*e8e4245dSRobert Muir<span class="dtd_comment">                The Baghdad communique said Iraqi planes also destroyed oil installations at Iran's southwestern Ahvaz field </span>
103*e8e4245dSRobert Muir<span class="dtd_comment">                during a raid today. It denied an Iranian report that an Iraqi jet was shot down.     </span>
104*e8e4245dSRobert Muir<span class="dtd_comment">                Iraq also reported a naval battle at the northern tip of the Gulf. Iraqi naval units and forces defending an </span>
105*e8e4245dSRobert Muir<span class="dtd_comment">                offshore terminal sank six Iranian out of 28 Iranian boats attempting to attack an offshore terminal, </span>
106*e8e4245dSRobert Muir<span class="dtd_comment">                the communique said.      Reuters 3;</span>
107*e8e4245dSRobert Muir<span class="dtd_comment">            &lt;/LikeThisQuery&gt;             </span>
108*e8e4245dSRobert Muir<span class="dtd_comment">            %   </span>
109*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span>
110*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">&gt;</span>
111*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Optional boost for matches on this query. Values &gt; 1 --&gt;</span>
112*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1.0&quot;</span><span class="dtd_tag_symbols">&gt;</span>
113*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Comma delimited list of field names --&gt;</span>
114*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldNames</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">&gt;</span>
115*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- a list of stop words - analyzed to produce stop terms --&gt;</span>
116*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">stopWords</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">&gt;</span>
117*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- controls the maximum number of words shortlisted for the query. The higher the number the slower the response due to more disk reads required --&gt;</span>
118*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">maxQueryTerms</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;20&quot;</span><span class="dtd_tag_symbols">&gt;</span>
119*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Controls how many times a term must appear in the example text before it is shortlisted for use in the query --&gt;</span>
120*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">minTermFrequency</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1&quot;</span><span class="dtd_tag_symbols">&gt;</span>
121*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- A quality control that can be used to limit the number of results to those documents matching a certain percentage of the shortlisted query terms.</span>
122*e8e4245dSRobert Muir<span class="dtd_comment">    Values must be between 1 and 100--&gt;</span>
123*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">LikeThisQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">percentTermsToMatch</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;30&quot;</span><span class="dtd_tag_symbols">&gt;</span>
124*e8e4245dSRobert Muir
125*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!--</span>
126*e8e4245dSRobert Muir<span class="dtd_comment">    Requires matches on the &quot;Query&quot; element and optionally boosts by any matches on the &quot;BoostQuery&quot;.</span>
127*e8e4245dSRobert Muir<span class="dtd_comment">    Unlike a regular BooleanQuery the boost can be less than 1 to produce a subtractive rather than additive result</span>
128*e8e4245dSRobert Muir<span class="dtd_comment">    on the match score. </span>
129*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> &lt;em&gt;Find documents about banks, preferably related to mergers, and preferably not about &quot;World bank&quot;&lt;/em&gt;</span>
130*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
131*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;BoostingQuery&gt;</span>
132*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;Query&gt;</span>
133*e8e4245dSRobert Muir<span class="dtd_comment">         &lt;BooleanQuery fieldName=&quot;contents&quot;&gt;</span>
134*e8e4245dSRobert Muir<span class="dtd_comment">           &lt;Clause occurs=&quot;should&quot;&gt;</span>
135*e8e4245dSRobert Muir<span class="dtd_comment">              &lt;TermQuery&gt;merger&lt;/TermQuery&gt;</span>
136*e8e4245dSRobert Muir<span class="dtd_comment">           &lt;/Clause&gt;</span>
137*e8e4245dSRobert Muir<span class="dtd_comment">           &lt;Clause occurs=&quot;must&quot;&gt;</span>
138*e8e4245dSRobert Muir<span class="dtd_comment">              &lt;TermQuery&gt;bank&lt;/TermQuery&gt;</span>
139*e8e4245dSRobert Muir<span class="dtd_comment">           &lt;/Clause&gt;</span>
140*e8e4245dSRobert Muir<span class="dtd_comment">         &lt;/BooleanQuery&gt;    </span>
141*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;/Query&gt;</span>
142*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;BoostQuery boost=&quot;0.01&quot;&gt;</span>
143*e8e4245dSRobert Muir<span class="dtd_comment">         &lt;UserQuery&gt;&quot;world bank&quot;&lt;/UserQuery&gt;</span>
144*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;/BoostQuery&gt;</span>
145*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/BoostingQuery&gt;</span>
146*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
147*e8e4245dSRobert Muir<span class="dtd_comment">    </span>
148*e8e4245dSRobert Muir<span class="dtd_comment">--&gt;</span><span class="dtd_plain"> </span>
149*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostingQuery</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Query</span><span class="dtd_plain">,</span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">&gt;</span>
150*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Optional boost for matches on this query. Values &gt; 1 --&gt;</span>
151*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostingQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1.0&quot;</span><span class="dtd_tag_symbols">&gt;</span>
152*e8e4245dSRobert Muir
153*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!--</span>
154*e8e4245dSRobert Muir<span class="dtd_comment">    Child element of BoostingQuery used to contain the choice of Query which is used for boosting purposes</span>
155*e8e4245dSRobert Muir<span class="dtd_comment">--&gt;</span><span class="dtd_plain"> </span>
156*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain"> (%</span><span class="dtd_attribute_name">queries</span><span class="dtd_plain">;)</span><span class="dtd_tag_symbols">&gt;</span>
157*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Optional boost for matches on this query. A boost of &gt;0 but &lt;1 </span>
158*e8e4245dSRobert Muir<span class="dtd_comment">    effectively demotes results from Query that match this BoostQuery.      </span>
159*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span>
160*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BoostQuery</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">boost</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_attribute_value">&quot;1.0&quot;</span><span class="dtd_tag_symbols">&gt;</span>
161*e8e4245dSRobert Muir
162*e8e4245dSRobert Muir
163*e8e4245dSRobert Muir
164*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Removes duplicated documents from results where &quot;duplicate&quot; means documents share a value for a particular field such as a primary key</span>
165*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> &lt;em&gt;Find the latest version of each web page that mentions &quot;Lucene&quot;&lt;/em&gt;</span>
166*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
167*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;FilteredQuery&gt;</span>
168*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;Query&gt;</span>
169*e8e4245dSRobert Muir<span class="dtd_comment">         &lt;TermQuery fieldName=&quot;text&quot;&gt;lucene&lt;/TermQuery&gt;</span>
170*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;/Query&gt;</span>
171*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;Filter&gt;</span>
172*e8e4245dSRobert Muir<span class="dtd_comment">        &lt;DuplicateFilter fieldName=&quot;url&quot; keepMode=&quot;last&quot;/&gt;</span>
173*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;/Filter&gt; </span>
174*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/FilteredQuery&gt;    </span>
175*e8e4245dSRobert Muir<span class="dtd_comment">    %   </span>
176*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span>
177*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_keyword">EMPTY</span><span class="dtd_tag_symbols">&gt;</span>
178*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a &quot;fieldName&quot; attribute --&gt;</span><span class="dtd_plain"> </span>
179*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">&gt;</span>
180*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Determines if the first or last document occurence is the one to return when presented with duplicated field values --&gt;</span><span class="dtd_plain">    </span>
181*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">keepMode</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">first</span><span class="dtd_plain"> | </span><span class="dtd_attribute_name">last</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">&quot;first&quot;</span><span class="dtd_tag_symbols">&gt;</span>
182*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Controls the choice of process used to produce the filter - &quot;full&quot; mode identifies only non-duplicate documents with the chosen field </span>
183*e8e4245dSRobert Muir<span class="dtd_comment">    while &quot;fast&quot; mode may perform faster but will also mark documents &lt;em&gt;without&lt;/em&gt; the field as valid. The former approach starts by </span>
184*e8e4245dSRobert Muir<span class="dtd_comment">    assuming every document is a duplicate then finds the &quot;master&quot; documents to keep while the latter approach assumes all documents are </span>
185*e8e4245dSRobert Muir<span class="dtd_comment">    unique and  unmarks those documents that are a copy. </span>
186*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span><span class="dtd_plain"> </span>
187*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">DuplicateFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">processingMode</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">full</span><span class="dtd_plain"> | </span><span class="dtd_attribute_name">fast</span><span class="dtd_plain">) </span><span class="dtd_attribute_value">&quot;full&quot;</span><span class="dtd_tag_symbols">&gt;</span>
188*e8e4245dSRobert Muir
189*e8e4245dSRobert Muir
190*e8e4245dSRobert Muir
191*e8e4245dSRobert Muir
192*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- Processes child text using a field-specific choice of Analyzer to produce a set of terms that are then used as a filter.</span>
193*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> &lt;em&gt;Find documents talking about Lucene written on a Monday or a Friday&lt;/em&gt;</span>
194*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
195*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;FilteredQuery&gt;</span>
196*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;Query&gt;</span>
197*e8e4245dSRobert Muir<span class="dtd_comment">         &lt;TermQuery fieldName=&quot;text&quot;&gt;lucene&lt;/TermQuery&gt;</span>
198*e8e4245dSRobert Muir<span class="dtd_comment">      &lt;/Query&gt;</span>
199*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;Filter&gt;</span>
200*e8e4245dSRobert Muir<span class="dtd_comment">        &lt;TermsFilter fieldName=&quot;dayOfWeek&quot;&gt;monday friday&lt;/TermsFilter&gt; </span>
201*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/Filter&gt;   </span>
202*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/FilteredQuery&gt;    </span>
203*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
204*e8e4245dSRobert Muir<span class="dtd_comment">    </span>
205*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span>
206*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">TermsFilter</span><span class="dtd_plain"> (</span><span class="dtd_keyword">#PCDATA</span><span class="dtd_plain">)</span><span class="dtd_tag_symbols">&gt;</span>
207*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a &quot;fieldName&quot; attribute --&gt;</span><span class="dtd_plain"> </span>
208*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ATTLIST</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">TermsFilter</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">fieldName</span><span class="dtd_plain"> </span><span class="dtd_keyword">CDATA</span><span class="dtd_plain"> </span><span class="dtd_keyword">#IMPLIED</span><span class="dtd_tag_symbols">&gt;</span>
209*e8e4245dSRobert Muir<span class="dtd_comment">&lt;!--</span>
210*e8e4245dSRobert Muir<span class="dtd_comment">    A Filter equivalent to BooleanQuery that applies Boolean logic to Clauses containing Filters.</span>
211*e8e4245dSRobert Muir<span class="dtd_comment">    Unlike BooleanQuery a BooleanFilter can contain a single &quot;mustNot&quot; clause.</span>
212*e8e4245dSRobert Muir<span class="dtd_comment">    </span><span class="dtd_dtddoc_tag">@example</span><span class="dtd_comment"> &lt;em&gt;Find documents from the first quarter of this year or last year that are not in &quot;draft&quot; status&lt;/em&gt;</span>
213*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
214*e8e4245dSRobert Muir<span class="dtd_comment">     &lt;FilteredQuery&gt;</span>
215*e8e4245dSRobert Muir<span class="dtd_comment">       &lt;Query&gt;</span>
216*e8e4245dSRobert Muir<span class="dtd_comment">           &lt;MatchAllDocsQuery/&gt;</span>
217*e8e4245dSRobert Muir<span class="dtd_comment">       &lt;/Query&gt;</span>
218*e8e4245dSRobert Muir<span class="dtd_comment">       &lt;Filter&gt;</span>
219*e8e4245dSRobert Muir<span class="dtd_comment">        &lt;BooleanFilter&gt;</span>
220*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;Clause occurs=&quot;should&quot;&gt;</span>
221*e8e4245dSRobert Muir<span class="dtd_comment">             &lt;RangeFilter fieldName=&quot;date&quot; lowerTerm=&quot;20070101&quot; upperTerm=&quot;20070401&quot;/&gt;</span>
222*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;/Clause&gt;</span>
223*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;Clause occurs=&quot;should&quot;&gt;</span>
224*e8e4245dSRobert Muir<span class="dtd_comment">             &lt;RangeFilter fieldName=&quot;date&quot; lowerTerm=&quot;20060101&quot; upperTerm=&quot;20060401&quot;/&gt;</span>
225*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;/Clause&gt;</span>
226*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;Clause occurs=&quot;mustNot&quot;&gt;</span>
227*e8e4245dSRobert Muir<span class="dtd_comment">             &lt;TermsFilter fieldName=&quot;status&quot;&gt;draft&lt;/TermsFilter&gt; </span>
228*e8e4245dSRobert Muir<span class="dtd_comment">          &lt;/Clause&gt;</span>
229*e8e4245dSRobert Muir<span class="dtd_comment">        &lt;/BooleanFilter&gt;</span>
230*e8e4245dSRobert Muir<span class="dtd_comment">       &lt;/Filter&gt;</span>
231*e8e4245dSRobert Muir<span class="dtd_comment">    &lt;/FilteredQuery&gt;</span>
232*e8e4245dSRobert Muir<span class="dtd_comment">    %</span>
233*e8e4245dSRobert Muir<span class="dtd_comment">    --&gt;</span>
234*e8e4245dSRobert Muir<span class="dtd_tag_symbols">&lt;!</span><span class="dtd_tag_name">ELEMENT</span><span class="dtd_plain"> </span><span class="dtd_attribute_name">BooleanFilter</span><span class="dtd_plain"> (</span><span class="dtd_attribute_name">Clause</span><span class="dtd_plain">)+</span><span class="dtd_tag_symbols">&gt;</span>
235*e8e4245dSRobert Muir
236*e8e4245dSRobert Muir</pre>
237*e8e4245dSRobert Muir</body></html>
238