1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. 22 */ 23 package org.opengrok.indexer.search.context; 24 25 import java.util.ArrayList; 26 import java.util.HashSet; 27 import java.util.List; 28 import java.util.Map; 29 import java.util.Set; 30 import org.apache.lucene.index.Term; 31 import org.apache.lucene.search.BooleanClause; 32 import org.apache.lucene.search.BooleanQuery; 33 import org.apache.lucene.search.PhraseQuery; 34 import org.apache.lucene.search.PrefixQuery; 35 import org.apache.lucene.search.Query; 36 import org.apache.lucene.search.RegexpQuery; 37 import org.apache.lucene.search.TermQuery; 38 import org.apache.lucene.search.WildcardQuery; 39 40 /** 41 * Utility class used to extract the terms used in a query This class will not 42 * find terms for MultiTermQuery, RangeQuery and PrefixQuery classes so the 43 * caller must pass a rewritten query (see query.rewrite) to obtain a list of 44 * expanded terms. 45 */ 46 public final class QueryMatchers { 47 48 private Set<String> caseSensitiveTerms; 49 private Set<String> caseInsensitiveTerms; 50 private List<LineMatcher> matchers; 51 private Map<String, Boolean> fields; 52 53 /** 54 * Get the terms from a query and returns a list of DFAs which match a stream 55 * of tokens. 56 * 57 * @param query the query to generate matchers for 58 * @param fields a map whose keys tell which fields to create matchers for, 59 * and whose values tell if the field is case insensitive (true) or case 60 * sensitive (false) 61 * @return list of LineMatching DFAs 62 */ getMatchers(Query query, Map<String, Boolean> fields)63 public LineMatcher[] getMatchers(Query query, Map<String, Boolean> fields) { 64 caseSensitiveTerms = new HashSet<>(); 65 caseInsensitiveTerms = new HashSet<>(); 66 matchers = new ArrayList<>(); 67 this.fields = fields; 68 getTerms(query); 69 if (!caseSensitiveTerms.isEmpty()) { 70 matchers.add(0, new TokenSetMatcher(caseSensitiveTerms, false)); 71 } 72 if (!caseInsensitiveTerms.isEmpty()) { 73 matchers.add(0, new TokenSetMatcher(caseInsensitiveTerms, true)); 74 } 75 if (matchers.isEmpty()) { 76 return null; 77 } 78 return matchers.toArray(new LineMatcher[0]); 79 } 80 getTerms(Query query)81 private void getTerms(Query query) { 82 if (query instanceof BooleanQuery) { 83 getBooleans((BooleanQuery) query); 84 } else if (query instanceof PhraseQuery) { 85 getPhrases((PhraseQuery) query); 86 } else if (query instanceof WildcardQuery) { 87 getWildTerm((WildcardQuery) query); 88 } else if (query instanceof TermQuery) { 89 getTerm((TermQuery) query); 90 } else if (query instanceof PrefixQuery) { 91 getPrefix((PrefixQuery) query); 92 } else if (query instanceof RegexpQuery) { 93 getRegexp((RegexpQuery) query); 94 } 95 } 96 getRegexp(RegexpQuery query)97 private void getRegexp(RegexpQuery query) { 98 if (useTerm(query.getField())) { 99 String term = query.toString(query.getField()); 100 term = term.substring(1, term.length() - 1); //trim / from /regexp/ 101 matchers.add(new RegexpMatcher(term, true)); 102 } 103 } 104 getBooleans(BooleanQuery query)105 private void getBooleans(BooleanQuery query) { 106 for (BooleanClause clause : query) { 107 if (!clause.isProhibited()) { 108 getTerms(clause.getQuery()); 109 } 110 } 111 } 112 getPhrases(PhraseQuery query)113 private void getPhrases(PhraseQuery query) { 114 Term[] queryTerms = query.getTerms(); 115 if (queryTerms.length > 0 && useTerm(queryTerms[0])) { 116 boolean caseInsensitive = isCaseInsensitive(queryTerms[0]); 117 String[] termsArray = new String[queryTerms.length]; 118 for (int i = 0; i < queryTerms.length; i++) { 119 termsArray[i] = queryTerms[i].text(); 120 } 121 matchers.add(new PhraseMatcher(termsArray, caseInsensitive)); 122 } 123 } 124 getTerm(TermQuery query)125 private void getTerm(TermQuery query) { 126 Term term = query.getTerm(); 127 if (useTerm(term)) { 128 String text = term.text(); 129 if (isCaseInsensitive(term)) { 130 caseInsensitiveTerms.add(text); 131 } else { 132 caseSensitiveTerms.add(text); 133 } 134 } 135 } 136 getWildTerm(WildcardQuery query)137 private void getWildTerm(WildcardQuery query) { 138 Term term = query.getTerm(); 139 if (useTerm(term)) { 140 matchers.add( 141 new WildCardMatcher(term.text(), isCaseInsensitive(term))); 142 } 143 } 144 getPrefix(PrefixQuery query)145 private void getPrefix(PrefixQuery query) { 146 Term term = query.getPrefix(); 147 if (useTerm(term)) { 148 matchers.add( 149 new PrefixMatcher(term.text(), isCaseInsensitive(term))); 150 } 151 } 152 153 /** 154 * Check whether a matcher should be created for a term. 155 */ useTerm(Term term)156 private boolean useTerm(Term term) { 157 return useTerm(term.field()); 158 } 159 160 /** 161 * Check whether a matcher should be created for a term. 162 */ useTerm(String termField)163 private boolean useTerm(String termField) { 164 return fields.containsKey(termField); 165 } 166 167 /** 168 * Check if a term should be matched in a case-insensitive manner. Should 169 * only be called on terms for which {@link #useTerm(Term)} returns true. 170 */ isCaseInsensitive(Term term)171 private boolean isCaseInsensitive(Term term) { 172 return fields.get(term.field()); 173 } 174 } 175