xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/CustomQueryParser.java (revision 0e4c55544f8ea0a68e8bae37b0e502097e008ec1)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21b5840353SAdam Hornáček  * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
22*52dccac1SChris Fraire  * Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
23b5840353SAdam Hornáček  */
249805b761SAdam Hornáček package org.opengrok.indexer.search;
25b5840353SAdam Hornáček 
26*52dccac1SChris Fraire import java.util.Locale;
27b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.ParseException;
28b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.QueryParser;
29b5840353SAdam Hornáček import org.apache.lucene.search.Query;
309805b761SAdam Hornáček import org.opengrok.indexer.analysis.CompatibleAnalyser;
319805b761SAdam Hornáček import org.opengrok.indexer.configuration.RuntimeEnvironment;
32b5840353SAdam Hornáček 
33b5840353SAdam Hornáček /**
34b5840353SAdam Hornáček  * A custom query parser for OpenGrok.
35b5840353SAdam Hornáček  */
36911e8af0SAdam Hornáček public class CustomQueryParser extends QueryParser {
37b5840353SAdam Hornáček 
38b5840353SAdam Hornáček     /**
39b5840353SAdam Hornáček      * Create a query parser customized for OpenGrok.
40b5840353SAdam Hornáček      *
41b5840353SAdam Hornáček      * @param field default field for unqualified query terms
42b5840353SAdam Hornáček      */
CustomQueryParser(String field)43911e8af0SAdam Hornáček     public CustomQueryParser(String field) {
44b5840353SAdam Hornáček         super(field, new CompatibleAnalyser());
45b5840353SAdam Hornáček         setDefaultOperator(AND_OPERATOR);
46b5840353SAdam Hornáček         setAllowLeadingWildcard(
47b5840353SAdam Hornáček                 RuntimeEnvironment.getInstance().isAllowLeadingWildcard());
48b5840353SAdam Hornáček         // Convert terms to lower case manually to prevent changing the case
49b5840353SAdam Hornáček         // if the field is case sensitive.
50b5840353SAdam Hornáček         // since lucene 7.0.0 below is in place so every class that
51b5840353SAdam Hornáček         // extends Analyser must normalize the text by itself
52b5840353SAdam Hornáček         /*
53b5840353SAdam Hornáček ## AnalyzingQueryParser removed (LUCENE-7355)
54b5840353SAdam Hornáček 
55b5840353SAdam Hornáček The functionality of AnalyzingQueryParser has been folded into the classic
56b5840353SAdam Hornáček QueryParser, which now passes terms through Analyzer#normalize when generating
57b5840353SAdam Hornáček queries.
58b5840353SAdam Hornáček 
59b5840353SAdam Hornáček ## CommonQueryParserConfiguration.setLowerCaseExpandedTerms removed (LUCENE-7355)
60b5840353SAdam Hornáček 
61b5840353SAdam Hornáček This option has been removed as expanded terms are now normalized through
62b5840353SAdam Hornáček Analyzer#normalize.
63b5840353SAdam Hornáček         */
64b5840353SAdam Hornáček      //   setLowercaseExpandedTerms(false);
65b5840353SAdam Hornáček 
66b5840353SAdam Hornáček     }
67b5840353SAdam Hornáček 
68b5840353SAdam Hornáček     /**
69b5840353SAdam Hornáček      * Is this field case sensitive?
70b5840353SAdam Hornáček      *
71b5840353SAdam Hornáček      * @param field name of the field to check
72b5840353SAdam Hornáček      * @return {@code true} if the field is case sensitive, {@code false}
73b5840353SAdam Hornáček      * otherwise
74b5840353SAdam Hornáček      */
isCaseSensitive(String field)75911e8af0SAdam Hornáček     protected static boolean isCaseSensitive(String field) {
76b5840353SAdam Hornáček         // Only definition search and reference search are case sensitive
77b5840353SAdam Hornáček         return QueryBuilder.DEFS.equals(field)
78b5840353SAdam Hornáček                 || QueryBuilder.REFS.equals(field);
79b5840353SAdam Hornáček     }
80b5840353SAdam Hornáček 
81b5840353SAdam Hornáček     /**
82b5840353SAdam Hornáček      * Get a canonical form of a search term. This will convert the term to
83b5840353SAdam Hornáček      * lower case if the field is case insensitive.
84b5840353SAdam Hornáček      *
85b5840353SAdam Hornáček      * @param field the field to search on
86b5840353SAdam Hornáček      * @param term the term to search for
87b5840353SAdam Hornáček      * @return the canonical form of the search term, which matches how it is
88b5840353SAdam Hornáček      * stored in the index
89b5840353SAdam Hornáček      */
90b5840353SAdam Hornáček     // The analyzers use the default locale. They probably should have used
91b5840353SAdam Hornáček     // a fixed locale, but since they don't, we ignore that PMD warning here.
92b5840353SAdam Hornáček     @SuppressWarnings("PMD.UseLocaleWithCaseConversions")
getCanonicalTerm(String field, String term)93b5840353SAdam Hornáček     private static String getCanonicalTerm(String field, String term) {
94*52dccac1SChris Fraire         return isCaseSensitive(field) ? term : term.toLowerCase(Locale.ROOT);
95b5840353SAdam Hornáček     }
96b5840353SAdam Hornáček 
97b5840353SAdam Hornáček     // Override the get***Query() methods to lower case the search terms if
98b5840353SAdam Hornáček     // the field is case sensitive. We don't need to override getFieldQuery()
99b5840353SAdam Hornáček     // because it uses the analyzer to convert the terms to canonical form.
100b5840353SAdam Hornáček     @Override
getFuzzyQuery(String field, String term, float min)101b5840353SAdam Hornáček     protected Query getFuzzyQuery(String field, String term, float min)
102b5840353SAdam Hornáček             throws ParseException {
103b5840353SAdam Hornáček         return super.getFuzzyQuery(field, getCanonicalTerm(field, term), min);
104b5840353SAdam Hornáček     }
105b5840353SAdam Hornáček 
106b5840353SAdam Hornáček     @Override
getPrefixQuery(String field, String term)107b5840353SAdam Hornáček     protected Query getPrefixQuery(String field, String term)
108b5840353SAdam Hornáček             throws ParseException {
109b5840353SAdam Hornáček         return super.getPrefixQuery(field, getCanonicalTerm(field, term));
110b5840353SAdam Hornáček     }
111b5840353SAdam Hornáček 
112b5840353SAdam Hornáček     @Override
getRangeQuery(String field, String term1, String term2, boolean startinclusive, boolean endinclusive)113b5840353SAdam Hornáček     protected Query getRangeQuery(String field, String term1, String term2,
114b5840353SAdam Hornáček             boolean startinclusive, boolean endinclusive)
115b5840353SAdam Hornáček             throws ParseException {
116b5840353SAdam Hornáček         return super.getRangeQuery(
117b5840353SAdam Hornáček                 field,
118b5840353SAdam Hornáček                 getCanonicalTerm(field, term1),
119b5840353SAdam Hornáček                 getCanonicalTerm(field, term2),
120b5840353SAdam Hornáček                 startinclusive,
121b5840353SAdam Hornáček                 endinclusive);
122b5840353SAdam Hornáček     }
123b5840353SAdam Hornáček 
124b5840353SAdam Hornáček     @Override
getWildcardQuery(String field, String term)125b5840353SAdam Hornáček     protected Query getWildcardQuery(String field, String term)
126b5840353SAdam Hornáček             throws ParseException {
127b5840353SAdam Hornáček         return super.getWildcardQuery(field, getCanonicalTerm(field, term));
128b5840353SAdam Hornáček     }
129b5840353SAdam Hornáček }
130