indexer/search/CustomQueryParser.java

b5840353SAdam Hornáček/*
b5840353SAdam Hornáček * CDDL HEADER START
b5840353SAdam Hornáček *
b5840353SAdam Hornáček * The contents of this file are subject to the terms of the
b5840353SAdam Hornáček * Common Development and Distribution License (the "License").
b5840353SAdam Hornáček * You may not use this file except in compliance with the License.
b5840353SAdam Hornáček *
b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific
b5840353SAdam Hornáček * language governing permissions and limitations under the License.
b5840353SAdam Hornáček *
b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each
b5840353SAdam Hornáček * file and include the License file at LICENSE.txt.
b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the
b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying
b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner]
b5840353SAdam Hornáček *
b5840353SAdam Hornáček * CDDL HEADER END
b5840353SAdam Hornáček */
b5840353SAdam Hornáček
b5840353SAdam Hornáček/*
b5840353SAdam Hornáček * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
*52dccac1SChris Fraire * Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
b5840353SAdam Hornáček */
9805b761SAdam Hornáčekpackage org.opengrok.indexer.search;
b5840353SAdam Hornáček
*52dccac1SChris Fraireimport java.util.Locale;
b5840353SAdam Hornáčekimport org.apache.lucene.queryparser.classic.ParseException;
b5840353SAdam Hornáčekimport org.apache.lucene.queryparser.classic.QueryParser;
b5840353SAdam Hornáčekimport org.apache.lucene.search.Query;
9805b761SAdam Hornáčekimport org.opengrok.indexer.analysis.CompatibleAnalyser;
9805b761SAdam Hornáčekimport org.opengrok.indexer.configuration.RuntimeEnvironment;
b5840353SAdam Hornáček
b5840353SAdam Hornáček/**
b5840353SAdam Hornáček * A custom query parser for OpenGrok.
b5840353SAdam Hornáček */
911e8af0SAdam Hornáčekpublic class CustomQueryParser extends QueryParser {
b5840353SAdam Hornáček
b5840353SAdam Hornáček    /**
b5840353SAdam Hornáček     * Create a query parser customized for OpenGrok.
b5840353SAdam Hornáček     *
b5840353SAdam Hornáček     * @param field default field for unqualified query terms
b5840353SAdam Hornáček     */
911e8af0SAdam Hornáček    public CustomQueryParser(String field) {
b5840353SAdam Hornáček        super(field, new CompatibleAnalyser());
b5840353SAdam Hornáček        setDefaultOperator(AND_OPERATOR);
b5840353SAdam Hornáček        setAllowLeadingWildcard(
b5840353SAdam Hornáček                RuntimeEnvironment.getInstance().isAllowLeadingWildcard());
b5840353SAdam Hornáček        // Convert terms to lower case manually to prevent changing the case
b5840353SAdam Hornáček        // if the field is case sensitive.
b5840353SAdam Hornáček        // since lucene 7.0.0 below is in place so every class that
b5840353SAdam Hornáček        // extends Analyser must normalize the text by itself
b5840353SAdam Hornáček        /*
b5840353SAdam Hornáček## AnalyzingQueryParser removed (LUCENE-7355)
b5840353SAdam Hornáček
b5840353SAdam HornáčekThe functionality of AnalyzingQueryParser has been folded into the classic
b5840353SAdam HornáčekQueryParser, which now passes terms through Analyzer#normalize when generating
b5840353SAdam Hornáčekqueries.
b5840353SAdam Hornáček
b5840353SAdam Hornáček## CommonQueryParserConfiguration.setLowerCaseExpandedTerms removed (LUCENE-7355)
b5840353SAdam Hornáček
b5840353SAdam HornáčekThis option has been removed as expanded terms are now normalized through
b5840353SAdam HornáčekAnalyzer#normalize.
b5840353SAdam Hornáček        */
b5840353SAdam Hornáček     //   setLowercaseExpandedTerms(false);
b5840353SAdam Hornáček
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    /**
b5840353SAdam Hornáček     * Is this field case sensitive?
b5840353SAdam Hornáček     *
b5840353SAdam Hornáček     * @param field name of the field to check
b5840353SAdam Hornáček     * @return {@code true} if the field is case sensitive, {@code false}
b5840353SAdam Hornáček     * otherwise
b5840353SAdam Hornáček     */
911e8af0SAdam Hornáček    protected static boolean isCaseSensitive(String field) {
b5840353SAdam Hornáček        // Only definition search and reference search are case sensitive
b5840353SAdam Hornáček        return QueryBuilder.DEFS.equals(field)
b5840353SAdam Hornáček                || QueryBuilder.REFS.equals(field);
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    /**
b5840353SAdam Hornáček     * Get a canonical form of a search term. This will convert the term to
b5840353SAdam Hornáček     * lower case if the field is case insensitive.
b5840353SAdam Hornáček     *
b5840353SAdam Hornáček     * @param field the field to search on
b5840353SAdam Hornáček     * @param term the term to search for
b5840353SAdam Hornáček     * @return the canonical form of the search term, which matches how it is
b5840353SAdam Hornáček     * stored in the index
b5840353SAdam Hornáček     */
b5840353SAdam Hornáček    // The analyzers use the default locale. They probably should have used
b5840353SAdam Hornáček    // a fixed locale, but since they don't, we ignore that PMD warning here.
b5840353SAdam Hornáček    @SuppressWarnings("PMD.UseLocaleWithCaseConversions")
b5840353SAdam Hornáček    private static String getCanonicalTerm(String field, String term) {
*52dccac1SChris Fraire        return isCaseSensitive(field) ? term : term.toLowerCase(Locale.ROOT);
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    // Override the get***Query() methods to lower case the search terms if
b5840353SAdam Hornáček    // the field is case sensitive. We don't need to override getFieldQuery()
b5840353SAdam Hornáček    // because it uses the analyzer to convert the terms to canonical form.
b5840353SAdam Hornáček    @Override
b5840353SAdam Hornáček    protected Query getFuzzyQuery(String field, String term, float min)
b5840353SAdam Hornáček            throws ParseException {
b5840353SAdam Hornáček        return super.getFuzzyQuery(field, getCanonicalTerm(field, term), min);
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    @Override
b5840353SAdam Hornáček    protected Query getPrefixQuery(String field, String term)
b5840353SAdam Hornáček            throws ParseException {
b5840353SAdam Hornáček        return super.getPrefixQuery(field, getCanonicalTerm(field, term));
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    @Override
b5840353SAdam Hornáček    protected Query getRangeQuery(String field, String term1, String term2,
b5840353SAdam Hornáček            boolean startinclusive, boolean endinclusive)
b5840353SAdam Hornáček            throws ParseException {
b5840353SAdam Hornáček        return super.getRangeQuery(
b5840353SAdam Hornáček                field,
b5840353SAdam Hornáček                getCanonicalTerm(field, term1),
b5840353SAdam Hornáček                getCanonicalTerm(field, term2),
b5840353SAdam Hornáček                startinclusive,
b5840353SAdam Hornáček                endinclusive);
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček
b5840353SAdam Hornáček    @Override
b5840353SAdam Hornáček    protected Query getWildcardQuery(String field, String term)
b5840353SAdam Hornáček            throws ParseException {
b5840353SAdam Hornáček        return super.getWildcardQuery(field, getCanonicalTerm(field, term));
b5840353SAdam Hornáček    }
b5840353SAdam Hornáček}