xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/QueryMatchers.java (revision c6f0939b1c668e9f8e1e276424439c3106b3a029)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
22  */
23 package org.opengrok.indexer.search.context;
24 
25 import java.util.ArrayList;
26 import java.util.HashSet;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30 import org.apache.lucene.index.Term;
31 import org.apache.lucene.search.BooleanClause;
32 import org.apache.lucene.search.BooleanQuery;
33 import org.apache.lucene.search.PhraseQuery;
34 import org.apache.lucene.search.PrefixQuery;
35 import org.apache.lucene.search.Query;
36 import org.apache.lucene.search.RegexpQuery;
37 import org.apache.lucene.search.TermQuery;
38 import org.apache.lucene.search.WildcardQuery;
39 
40 /**
41  * Utility class used to extract the terms used in a query This class will not
42  * find terms for MultiTermQuery, RangeQuery and PrefixQuery classes so the
43  * caller must pass a rewritten query (see query.rewrite) to obtain a list of
44  * expanded terms.
45  */
46 public final class QueryMatchers {
47 
48     private Set<String> caseSensitiveTerms;
49     private Set<String> caseInsensitiveTerms;
50     private List<LineMatcher> matchers;
51     private Map<String, Boolean> fields;
52 
53     /**
54      * Get the terms from a query and returns a list of DFAs which match a stream
55      * of tokens.
56      *
57      * @param query the query to generate matchers for
58      * @param fields a map whose keys tell which fields to create matchers for,
59      * and whose values tell if the field is case insensitive (true) or case
60      * sensitive (false)
61      * @return list of LineMatching DFAs
62      */
getMatchers(Query query, Map<String, Boolean> fields)63     public LineMatcher[] getMatchers(Query query, Map<String, Boolean> fields) {
64         caseSensitiveTerms = new HashSet<>();
65         caseInsensitiveTerms = new HashSet<>();
66         matchers = new ArrayList<>();
67         this.fields = fields;
68         getTerms(query);
69         if (!caseSensitiveTerms.isEmpty()) {
70             matchers.add(0, new TokenSetMatcher(caseSensitiveTerms, false));
71         }
72         if (!caseInsensitiveTerms.isEmpty()) {
73             matchers.add(0, new TokenSetMatcher(caseInsensitiveTerms, true));
74         }
75         if (matchers.isEmpty()) {
76             return null;
77         }
78         return matchers.toArray(new LineMatcher[0]);
79     }
80 
getTerms(Query query)81     private void getTerms(Query query) {
82         if (query instanceof BooleanQuery) {
83             getBooleans((BooleanQuery) query);
84         } else if (query instanceof PhraseQuery) {
85             getPhrases((PhraseQuery) query);
86         } else if (query instanceof WildcardQuery) {
87             getWildTerm((WildcardQuery) query);
88         } else if (query instanceof TermQuery) {
89             getTerm((TermQuery) query);
90         } else if (query instanceof PrefixQuery) {
91             getPrefix((PrefixQuery) query);
92         } else if (query instanceof RegexpQuery) {
93             getRegexp((RegexpQuery) query);
94         }
95     }
96 
getRegexp(RegexpQuery query)97     private void getRegexp(RegexpQuery query) {
98         if (useTerm(query.getField())) {
99             String term = query.toString(query.getField());
100             term = term.substring(1, term.length() - 1); //trim / from /regexp/
101             matchers.add(new RegexpMatcher(term, true));
102         }
103     }
104 
getBooleans(BooleanQuery query)105     private void getBooleans(BooleanQuery query) {
106         for (BooleanClause clause : query) {
107             if (!clause.isProhibited()) {
108                 getTerms(clause.getQuery());
109             }
110         }
111     }
112 
getPhrases(PhraseQuery query)113     private void getPhrases(PhraseQuery query) {
114         Term[] queryTerms = query.getTerms();
115         if (queryTerms.length > 0 && useTerm(queryTerms[0])) {
116             boolean caseInsensitive = isCaseInsensitive(queryTerms[0]);
117             String[] termsArray = new String[queryTerms.length];
118             for (int i = 0; i < queryTerms.length; i++) {
119                 termsArray[i] = queryTerms[i].text();
120             }
121             matchers.add(new PhraseMatcher(termsArray, caseInsensitive));
122         }
123     }
124 
getTerm(TermQuery query)125     private void getTerm(TermQuery query) {
126         Term term = query.getTerm();
127         if (useTerm(term)) {
128             String text = term.text();
129             if (isCaseInsensitive(term)) {
130                 caseInsensitiveTerms.add(text);
131             } else {
132                 caseSensitiveTerms.add(text);
133             }
134         }
135     }
136 
getWildTerm(WildcardQuery query)137     private void getWildTerm(WildcardQuery query) {
138         Term term = query.getTerm();
139         if (useTerm(term)) {
140             matchers.add(
141                     new WildCardMatcher(term.text(), isCaseInsensitive(term)));
142         }
143     }
144 
getPrefix(PrefixQuery query)145     private void getPrefix(PrefixQuery query) {
146         Term term = query.getPrefix();
147         if (useTerm(term)) {
148             matchers.add(
149                     new PrefixMatcher(term.text(), isCaseInsensitive(term)));
150         }
151     }
152 
153     /**
154      * Check whether a matcher should be created for a term.
155      */
useTerm(Term term)156     private boolean useTerm(Term term) {
157         return useTerm(term.field());
158     }
159 
160     /**
161      * Check whether a matcher should be created for a term.
162      */
useTerm(String termField)163     private boolean useTerm(String termField) {
164         return fields.containsKey(termField);
165     }
166 
167     /**
168      * Check if a term should be matched in a case-insensitive manner. Should
169      * only be called on terms for which {@link #useTerm(Term)} returns true.
170      */
isCaseInsensitive(Term term)171     private boolean isCaseInsensitive(Term term) {
172         return fields.get(term.field());
173     }
174 }
175