1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 21b5840353SAdam Hornáček * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. 22*52dccac1SChris Fraire * Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>. 23b5840353SAdam Hornáček */ 249805b761SAdam Hornáček package org.opengrok.indexer.search; 25b5840353SAdam Hornáček 26*52dccac1SChris Fraire import java.util.Locale; 27b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.ParseException; 28b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.QueryParser; 29b5840353SAdam Hornáček import org.apache.lucene.search.Query; 309805b761SAdam Hornáček import org.opengrok.indexer.analysis.CompatibleAnalyser; 319805b761SAdam Hornáček import org.opengrok.indexer.configuration.RuntimeEnvironment; 32b5840353SAdam Hornáček 33b5840353SAdam Hornáček /** 34b5840353SAdam Hornáček * A custom query parser for OpenGrok. 35b5840353SAdam Hornáček */ 36911e8af0SAdam Hornáček public class CustomQueryParser extends QueryParser { 37b5840353SAdam Hornáček 38b5840353SAdam Hornáček /** 39b5840353SAdam Hornáček * Create a query parser customized for OpenGrok. 40b5840353SAdam Hornáček * 41b5840353SAdam Hornáček * @param field default field for unqualified query terms 42b5840353SAdam Hornáček */ CustomQueryParser(String field)43911e8af0SAdam Hornáček public CustomQueryParser(String field) { 44b5840353SAdam Hornáček super(field, new CompatibleAnalyser()); 45b5840353SAdam Hornáček setDefaultOperator(AND_OPERATOR); 46b5840353SAdam Hornáček setAllowLeadingWildcard( 47b5840353SAdam Hornáček RuntimeEnvironment.getInstance().isAllowLeadingWildcard()); 48b5840353SAdam Hornáček // Convert terms to lower case manually to prevent changing the case 49b5840353SAdam Hornáček // if the field is case sensitive. 50b5840353SAdam Hornáček // since lucene 7.0.0 below is in place so every class that 51b5840353SAdam Hornáček // extends Analyser must normalize the text by itself 52b5840353SAdam Hornáček /* 53b5840353SAdam Hornáček ## AnalyzingQueryParser removed (LUCENE-7355) 54b5840353SAdam Hornáček 55b5840353SAdam Hornáček The functionality of AnalyzingQueryParser has been folded into the classic 56b5840353SAdam Hornáček QueryParser, which now passes terms through Analyzer#normalize when generating 57b5840353SAdam Hornáček queries. 58b5840353SAdam Hornáček 59b5840353SAdam Hornáček ## CommonQueryParserConfiguration.setLowerCaseExpandedTerms removed (LUCENE-7355) 60b5840353SAdam Hornáček 61b5840353SAdam Hornáček This option has been removed as expanded terms are now normalized through 62b5840353SAdam Hornáček Analyzer#normalize. 63b5840353SAdam Hornáček */ 64b5840353SAdam Hornáček // setLowercaseExpandedTerms(false); 65b5840353SAdam Hornáček 66b5840353SAdam Hornáček } 67b5840353SAdam Hornáček 68b5840353SAdam Hornáček /** 69b5840353SAdam Hornáček * Is this field case sensitive? 70b5840353SAdam Hornáček * 71b5840353SAdam Hornáček * @param field name of the field to check 72b5840353SAdam Hornáček * @return {@code true} if the field is case sensitive, {@code false} 73b5840353SAdam Hornáček * otherwise 74b5840353SAdam Hornáček */ isCaseSensitive(String field)75911e8af0SAdam Hornáček protected static boolean isCaseSensitive(String field) { 76b5840353SAdam Hornáček // Only definition search and reference search are case sensitive 77b5840353SAdam Hornáček return QueryBuilder.DEFS.equals(field) 78b5840353SAdam Hornáček || QueryBuilder.REFS.equals(field); 79b5840353SAdam Hornáček } 80b5840353SAdam Hornáček 81b5840353SAdam Hornáček /** 82b5840353SAdam Hornáček * Get a canonical form of a search term. This will convert the term to 83b5840353SAdam Hornáček * lower case if the field is case insensitive. 84b5840353SAdam Hornáček * 85b5840353SAdam Hornáček * @param field the field to search on 86b5840353SAdam Hornáček * @param term the term to search for 87b5840353SAdam Hornáček * @return the canonical form of the search term, which matches how it is 88b5840353SAdam Hornáček * stored in the index 89b5840353SAdam Hornáček */ 90b5840353SAdam Hornáček // The analyzers use the default locale. They probably should have used 91b5840353SAdam Hornáček // a fixed locale, but since they don't, we ignore that PMD warning here. 92b5840353SAdam Hornáček @SuppressWarnings("PMD.UseLocaleWithCaseConversions") getCanonicalTerm(String field, String term)93b5840353SAdam Hornáček private static String getCanonicalTerm(String field, String term) { 94*52dccac1SChris Fraire return isCaseSensitive(field) ? term : term.toLowerCase(Locale.ROOT); 95b5840353SAdam Hornáček } 96b5840353SAdam Hornáček 97b5840353SAdam Hornáček // Override the get***Query() methods to lower case the search terms if 98b5840353SAdam Hornáček // the field is case sensitive. We don't need to override getFieldQuery() 99b5840353SAdam Hornáček // because it uses the analyzer to convert the terms to canonical form. 100b5840353SAdam Hornáček @Override getFuzzyQuery(String field, String term, float min)101b5840353SAdam Hornáček protected Query getFuzzyQuery(String field, String term, float min) 102b5840353SAdam Hornáček throws ParseException { 103b5840353SAdam Hornáček return super.getFuzzyQuery(field, getCanonicalTerm(field, term), min); 104b5840353SAdam Hornáček } 105b5840353SAdam Hornáček 106b5840353SAdam Hornáček @Override getPrefixQuery(String field, String term)107b5840353SAdam Hornáček protected Query getPrefixQuery(String field, String term) 108b5840353SAdam Hornáček throws ParseException { 109b5840353SAdam Hornáček return super.getPrefixQuery(field, getCanonicalTerm(field, term)); 110b5840353SAdam Hornáček } 111b5840353SAdam Hornáček 112b5840353SAdam Hornáček @Override getRangeQuery(String field, String term1, String term2, boolean startinclusive, boolean endinclusive)113b5840353SAdam Hornáček protected Query getRangeQuery(String field, String term1, String term2, 114b5840353SAdam Hornáček boolean startinclusive, boolean endinclusive) 115b5840353SAdam Hornáček throws ParseException { 116b5840353SAdam Hornáček return super.getRangeQuery( 117b5840353SAdam Hornáček field, 118b5840353SAdam Hornáček getCanonicalTerm(field, term1), 119b5840353SAdam Hornáček getCanonicalTerm(field, term2), 120b5840353SAdam Hornáček startinclusive, 121b5840353SAdam Hornáček endinclusive); 122b5840353SAdam Hornáček } 123b5840353SAdam Hornáček 124b5840353SAdam Hornáček @Override getWildcardQuery(String field, String term)125b5840353SAdam Hornáček protected Query getWildcardQuery(String field, String term) 126b5840353SAdam Hornáček throws ParseException { 127b5840353SAdam Hornáček return super.getWildcardQuery(field, getCanonicalTerm(field, term)); 128b5840353SAdam Hornáček } 129b5840353SAdam Hornáček } 130