xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/QueryBuilder.java (revision 0e4c55544f8ea0a68e8bae37b0e502097e008ec1)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21d21d069fSAdam Hornáček  * Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
2251e20d51SAdam Hornáček  * Portions Copyright (c) 2011, Jens Elkner.
2351e20d51SAdam Hornáček  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
24b5840353SAdam Hornáček  */
259805b761SAdam Hornáček package org.opengrok.indexer.search;
26b5840353SAdam Hornáček 
27b5840353SAdam Hornáček import java.io.File;
28a5cf78b2SChris Fraire import java.io.StringReader;
29b5840353SAdam Hornáček import java.nio.charset.StandardCharsets;
30b5840353SAdam Hornáček import java.security.MessageDigest;
31b5840353SAdam Hornáček import java.security.NoSuchAlgorithmException;
32b5840353SAdam Hornáček import java.util.ArrayList;
33911e8af0SAdam Hornáček import java.util.Arrays;
34b5840353SAdam Hornáček import java.util.Collections;
3520463cfeSChris Fraire import java.util.HashSet;
36b5840353SAdam Hornáček import java.util.List;
37b5840353SAdam Hornáček import java.util.Map;
38b5840353SAdam Hornáček import java.util.TreeMap;
39b5840353SAdam Hornáček import org.apache.lucene.queryparser.classic.ParseException;
40b5840353SAdam Hornáček import org.apache.lucene.search.BooleanClause;
41b5840353SAdam Hornáček import org.apache.lucene.search.BooleanClause.Occur;
42b5840353SAdam Hornáček import org.apache.lucene.search.BooleanQuery;
43b5840353SAdam Hornáček import org.apache.lucene.search.Query;
44b5840353SAdam Hornáček 
45b5840353SAdam Hornáček /**
46b5840353SAdam Hornáček  * Helper class that builds a Lucene query based on provided search terms for
47b5840353SAdam Hornáček  * the different fields.
48b5840353SAdam Hornáček  */
49b5840353SAdam Hornáček public class QueryBuilder {
50b5840353SAdam Hornáček 
51b5840353SAdam Hornáček     /**
52ff44f24aSAdam Hornáček      * Fields we use in lucene: public ones.
53b5840353SAdam Hornáček      */
54b5840353SAdam Hornáček     public static final String FULL = "full";
55b5840353SAdam Hornáček     public static final String DEFS = "defs";
56b5840353SAdam Hornáček     public static final String REFS = "refs";
57b5840353SAdam Hornáček     public static final String PATH = "path";
58b5840353SAdam Hornáček     public static final String HIST = "hist";
59b5840353SAdam Hornáček     public static final String TYPE = "type";
60b5840353SAdam Hornáček     public static final String SCOPES = "scopes";
61b5840353SAdam Hornáček     public static final String NUML = "numl";
62b5840353SAdam Hornáček     public static final String LOC = "loc";
63*ee484c52SVladimir Kotal     public static final String LASTREV = "lastrev"; // last revision
64b5840353SAdam Hornáček     /**
65*ee484c52SVladimir Kotal      * Fields we use in Lucene: internal ones.
66b5840353SAdam Hornáček      */
6741351de3SChris Fraire     public static final String D = "d"; // Directory key
6841351de3SChris Fraire     public static final String U = "u"; // File and timestamp key
69b5840353SAdam Hornáček     public static final String TAGS = "tags";
70b5840353SAdam Hornáček     public static final String T = "t";
71b5840353SAdam Hornáček     public static final String FULLPATH = "fullpath";
72b5840353SAdam Hornáček     public static final String DIRPATH = "dirpath";
73b5840353SAdam Hornáček     public static final String PROJECT = "project";
74b5840353SAdam Hornáček     public static final String DATE = "date";
75b5840353SAdam Hornáček     public static final String OBJUID = "objuid"; // object UID
76b5840353SAdam Hornáček     public static final String OBJSER = "objser"; // object serialized
77b5840353SAdam Hornáček     public static final String OBJVER = "objver"; // object version
78b5840353SAdam Hornáček 
7920463cfeSChris Fraire     protected static final List<String> searchFields = Arrays.asList(FULL, DEFS, REFS, PATH, HIST);
8020463cfeSChris Fraire     private static final HashSet<String> searchFieldsSet = new HashSet<>(searchFields);
81911e8af0SAdam Hornáček 
82ff44f24aSAdam Hornáček     /** Used for paths, so SHA-1 is completely sufficient. */
83b5840353SAdam Hornáček     private static final String DIRPATH_HASH_ALGORITHM = "SHA-1";
84b5840353SAdam Hornáček 
85b5840353SAdam Hornáček     /**
86b5840353SAdam Hornáček      * A map containing the query text for each field. (We use a sorted map here
87b5840353SAdam Hornáček      * only because we have tests that check the generated query string. If we
88b5840353SAdam Hornáček      * had used a hash map, the order of the terms could have varied between
89b5840353SAdam Hornáček      * platforms and it would be harder to test.)
90b5840353SAdam Hornáček      */
91b5840353SAdam Hornáček     private final Map<String, String> queries = new TreeMap<>();
92b5840353SAdam Hornáček 
getSearchFields()9320463cfeSChris Fraire     public static List<String> getSearchFields() {
9420463cfeSChris Fraire         return Collections.unmodifiableList(searchFields);
9520463cfeSChris Fraire     }
9620463cfeSChris Fraire 
9720463cfeSChris Fraire     /**
9820463cfeSChris Fraire      * Gets a value indicating if the specified {@code fieldName} is a valid
9920463cfeSChris Fraire      * search field.
10020463cfeSChris Fraire      */
isSearchField(String fieldName)10120463cfeSChris Fraire     public static boolean isSearchField(String fieldName) {
10220463cfeSChris Fraire         return searchFieldsSet.contains(fieldName);
10320463cfeSChris Fraire     }
10420463cfeSChris Fraire 
105b5840353SAdam Hornáček     /**
106b5840353SAdam Hornáček      * Sets the instance to the state of {@code other}.
107b5840353SAdam Hornáček      * @param other a defined instance
108b5840353SAdam Hornáček      * @return {@code this}
109b5840353SAdam Hornáček      */
reset(QueryBuilder other)110b5840353SAdam Hornáček     public QueryBuilder reset(QueryBuilder other) {
111b5840353SAdam Hornáček         if (other == null) {
112b5840353SAdam Hornáček             throw new IllegalArgumentException("other is null");
113b5840353SAdam Hornáček         }
114b5840353SAdam Hornáček         if (this != other) {
115b5840353SAdam Hornáček             queries.clear();
116b5840353SAdam Hornáček             queries.putAll(other.queries);
117b5840353SAdam Hornáček         }
118b5840353SAdam Hornáček         return this;
119b5840353SAdam Hornáček     }
120b5840353SAdam Hornáček 
121b5840353SAdam Hornáček     /**
1221c830032SChris Fraire      * Set search string for the {@link #FULL} field.
123b5840353SAdam Hornáček      *
124b5840353SAdam Hornáček      * @param freetext query string to set
125b5840353SAdam Hornáček      * @return this instance
126b5840353SAdam Hornáček      */
setFreetext(String freetext)127b5840353SAdam Hornáček     public QueryBuilder setFreetext(String freetext) {
128b5840353SAdam Hornáček         return addQueryText(FULL, freetext);
129b5840353SAdam Hornáček     }
130b5840353SAdam Hornáček 
131b5840353SAdam Hornáček     /**
1321c830032SChris Fraire      * Get search string for the {@link #FULL} field.
133b5840353SAdam Hornáček      *
134b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
135b5840353SAdam Hornáček      */
getFreetext()136b5840353SAdam Hornáček     public String getFreetext() {
137b5840353SAdam Hornáček         return getQueryText(FULL);
138b5840353SAdam Hornáček     }
139b5840353SAdam Hornáček 
140b5840353SAdam Hornáček     /**
1411c830032SChris Fraire      * Set search string for the {@link #DEFS} field.
142b5840353SAdam Hornáček      *
143b5840353SAdam Hornáček      * @param defs query string to set
144b5840353SAdam Hornáček      * @return this instance
145b5840353SAdam Hornáček      */
setDefs(String defs)146b5840353SAdam Hornáček     public QueryBuilder setDefs(String defs) {
147b5840353SAdam Hornáček         return addQueryText(DEFS, defs);
148b5840353SAdam Hornáček     }
149b5840353SAdam Hornáček 
150b5840353SAdam Hornáček     /**
1511c830032SChris Fraire      * Get search string for the {@link #FULL} field.
152b5840353SAdam Hornáček      *
153b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
154b5840353SAdam Hornáček      */
getDefs()155b5840353SAdam Hornáček     public String getDefs() {
156b5840353SAdam Hornáček         return getQueryText(DEFS);
157b5840353SAdam Hornáček     }
158b5840353SAdam Hornáček 
159b5840353SAdam Hornáček     /**
1601c830032SChris Fraire      * Set search string for the {@link #REFS} field.
161b5840353SAdam Hornáček      *
162b5840353SAdam Hornáček      * @param refs query string to set
163b5840353SAdam Hornáček      * @return this instance
164b5840353SAdam Hornáček      */
setRefs(String refs)165b5840353SAdam Hornáček     public QueryBuilder setRefs(String refs) {
166b5840353SAdam Hornáček         return addQueryText(REFS, refs);
167b5840353SAdam Hornáček     }
168b5840353SAdam Hornáček 
169b5840353SAdam Hornáček     /**
1701c830032SChris Fraire      * Get search string for the {@link #REFS} field.
171b5840353SAdam Hornáček      *
172b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
173b5840353SAdam Hornáček      */
getRefs()174b5840353SAdam Hornáček     public String getRefs() {
175b5840353SAdam Hornáček         return getQueryText(REFS);
176b5840353SAdam Hornáček     }
177b5840353SAdam Hornáček 
178b5840353SAdam Hornáček     /**
1791c830032SChris Fraire      * Set search string for the {@link #PATH} field.
180b5840353SAdam Hornáček      *
181b5840353SAdam Hornáček      * @param path query string to set
182b5840353SAdam Hornáček      * @return this instance
183b5840353SAdam Hornáček      */
setPath(String path)184b5840353SAdam Hornáček     public QueryBuilder setPath(String path) {
185b5840353SAdam Hornáček         return addQueryText(PATH, path);
186b5840353SAdam Hornáček     }
187b5840353SAdam Hornáček 
188b5840353SAdam Hornáček     /**
1891c830032SChris Fraire      * Get search string for the {@link #PATH} field.
190b5840353SAdam Hornáček      *
191b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
192b5840353SAdam Hornáček      */
getPath()193b5840353SAdam Hornáček     public String getPath() {
194b5840353SAdam Hornáček         return getQueryText(PATH);
195b5840353SAdam Hornáček     }
196b5840353SAdam Hornáček 
197b5840353SAdam Hornáček     /**
1981c830032SChris Fraire      * Set search string for the {@link #DIRPATH} field.
199b5840353SAdam Hornáček      * @param path query string to set
200b5840353SAdam Hornáček      * @return this instance
201b5840353SAdam Hornáček      */
setDirPath(String path)202b5840353SAdam Hornáček     public QueryBuilder setDirPath(String path) {
203b5840353SAdam Hornáček         String normalizedPath = normalizeDirPath(path);
204b5840353SAdam Hornáček         return addQueryText(DIRPATH, normalizedPath);
205b5840353SAdam Hornáček     }
206b5840353SAdam Hornáček 
207b5840353SAdam Hornáček     /**
2081c830032SChris Fraire      * Get search string for the {@link #DIRPATH} field.
209b5840353SAdam Hornáček      * @return {@code null} if not set; the query string otherwise.
210b5840353SAdam Hornáček      */
getDirPath()211b5840353SAdam Hornáček     public String getDirPath() {
212b5840353SAdam Hornáček         return getQueryText(DIRPATH);
213b5840353SAdam Hornáček     }
214b5840353SAdam Hornáček 
215b5840353SAdam Hornáček     /**
216b5840353SAdam Hornáček      * Transform {@code path} to ensure any {@link File#separatorChar} is
2179474ffdfSChris Fraire      * represented as '/', that there is a trailing '/' if {@code path} is not
2189474ffdfSChris Fraire      * empty, and then hash using SHA-1 and formatted in a private encoding
2199474ffdfSChris Fraire      * using only letters [g-u].
220b5840353SAdam Hornáček      * @param path a defined value
221b5840353SAdam Hornáček      * @return a defined, transformed value
222b5840353SAdam Hornáček      */
normalizeDirPath(String path)223b5840353SAdam Hornáček     public static String normalizeDirPath(String path) {
2249474ffdfSChris Fraire         String norm2;
2259474ffdfSChris Fraire         if (path.length() > 0) {
226b5840353SAdam Hornáček             String norm1 = path.replace(File.separatorChar, '/');
2279474ffdfSChris Fraire             norm2 = norm1.endsWith("/") ? norm1 : norm1 + "/";
2289474ffdfSChris Fraire         } else {
2299474ffdfSChris Fraire             norm2 = path;
2309474ffdfSChris Fraire         }
231b5840353SAdam Hornáček 
232b5840353SAdam Hornáček         MessageDigest digest;
233b5840353SAdam Hornáček         try {
234b5840353SAdam Hornáček             digest = MessageDigest.getInstance(DIRPATH_HASH_ALGORITHM);
235b5840353SAdam Hornáček         } catch (NoSuchAlgorithmException e) {
2369474ffdfSChris Fraire             /*
2379474ffdfSChris Fraire              * This will not happen since "Every implementation of the Java
2389474ffdfSChris Fraire              * platform is required to support the following standard
2399474ffdfSChris Fraire              * MessageDigest algorithms: MD5, SHA-1, SHA-256."
2409474ffdfSChris Fraire              */
2419474ffdfSChris Fraire             throw new RuntimeException(e);
242b5840353SAdam Hornáček         }
243b5840353SAdam Hornáček         byte[] hash = digest.digest(norm2.getBytes(StandardCharsets.UTF_8));
244b5840353SAdam Hornáček 
245b5840353SAdam Hornáček         StringBuilder encodedString = new StringBuilder();
246ff44f24aSAdam Hornáček         for (byte b : hash) {
247ff44f24aSAdam Hornáček             int v0 = (0xF0 & b) >> 4;
248ff44f24aSAdam Hornáček             int v1 = 0xF & b;
249b5840353SAdam Hornáček             char c0 = (char) ('g' + v0);
250b5840353SAdam Hornáček             char c1 = (char) ('g' + v1);
251b5840353SAdam Hornáček             encodedString.append(c0);
252b5840353SAdam Hornáček             encodedString.append(c1);
253b5840353SAdam Hornáček         }
254b5840353SAdam Hornáček         return encodedString.toString();
255b5840353SAdam Hornáček     }
256b5840353SAdam Hornáček 
257b5840353SAdam Hornáček     /**
2581c830032SChris Fraire      * Set search string for the {@link #HIST} field.
259b5840353SAdam Hornáček      *
260b5840353SAdam Hornáček      * @param hist query string to set
261b5840353SAdam Hornáček      * @return this instance
262b5840353SAdam Hornáček      */
setHist(String hist)263b5840353SAdam Hornáček     public QueryBuilder setHist(String hist) {
264b5840353SAdam Hornáček         return addQueryText(HIST, hist);
265b5840353SAdam Hornáček     }
266b5840353SAdam Hornáček 
267b5840353SAdam Hornáček     /**
2681c830032SChris Fraire      * Get search string for the {@link #HIST} field.
269b5840353SAdam Hornáček      *
270b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
271b5840353SAdam Hornáček      */
getHist()272b5840353SAdam Hornáček     public String getHist() {
273b5840353SAdam Hornáček         return getQueryText(HIST);
274b5840353SAdam Hornáček     }
275b5840353SAdam Hornáček 
276b5840353SAdam Hornáček     /**
2771c830032SChris Fraire      * Set search string for the {@link #TYPE} field.
278b5840353SAdam Hornáček      *
279b5840353SAdam Hornáček      * @param type query string to set
280b5840353SAdam Hornáček      * @return this instance
281b5840353SAdam Hornáček      */
setType(String type)282b5840353SAdam Hornáček     public QueryBuilder setType(String type) {
283b5840353SAdam Hornáček         return addQueryText(TYPE, type);
284b5840353SAdam Hornáček     }
285b5840353SAdam Hornáček 
286b5840353SAdam Hornáček     /**
2871c830032SChris Fraire      * Get search string for the {@link #TYPE} field.
288b5840353SAdam Hornáček      *
289b5840353SAdam Hornáček      * @return {@code null} if not set, the query string otherwise.
290b5840353SAdam Hornáček      */
getType()291b5840353SAdam Hornáček     public String getType() {
292b5840353SAdam Hornáček         return getQueryText(TYPE);
293b5840353SAdam Hornáček     }
294b5840353SAdam Hornáček 
295b5840353SAdam Hornáček     /**
296b5840353SAdam Hornáček      * Get a map containing the query text for each of the fields that have been
297b5840353SAdam Hornáček      * set.
298b5840353SAdam Hornáček      *
299b5840353SAdam Hornáček      * @return a possible empty map.
300b5840353SAdam Hornáček      */
getQueries()301b5840353SAdam Hornáček     public Map<String, String> getQueries() {
302b5840353SAdam Hornáček         return Collections.unmodifiableMap(queries);
303b5840353SAdam Hornáček     }
304b5840353SAdam Hornáček 
305b5840353SAdam Hornáček     /**
306b5840353SAdam Hornáček      * Gets a list of fields from {@link #getQueries()} which are extracted
307b5840353SAdam Hornáček      * from source text and which therefore can be used for context
308b5840353SAdam Hornáček      * presentations -- in the order of most specific to least.
309b5840353SAdam Hornáček      * @return a defined, possibly-empty list
310b5840353SAdam Hornáček      */
getContextFields()311b5840353SAdam Hornáček     public List<String> getContextFields() {
312b5840353SAdam Hornáček         List<String> fields = new ArrayList<>(queries.size());
313b5840353SAdam Hornáček         /**
314b5840353SAdam Hornáček          * setFreetext() allows query fragments that specify a field name with
315b5840353SAdam Hornáček          * a colon (e.g., "defs:ensure_cache" in the "Full Search" box), so the
316b5840353SAdam Hornáček          * context fields (i.e., the result of this method) are not just the
317b5840353SAdam Hornáček          * keys of `queries' but need a full parsing to be determined.
318b5840353SAdam Hornáček          */
319b5840353SAdam Hornáček         Query query;
320b5840353SAdam Hornáček         try {
321b5840353SAdam Hornáček             query = build();
322b5840353SAdam Hornáček         } catch (ParseException ex) {
323b5840353SAdam Hornáček             return fields;
324b5840353SAdam Hornáček         }
325b5840353SAdam Hornáček         String queryString = query.toString("");
326b5840353SAdam Hornáček         if (queryString.contains(DEFS + ":")) {
327b5840353SAdam Hornáček             fields.add(DEFS);
328b5840353SAdam Hornáček         }
329b5840353SAdam Hornáček         if (queryString.contains(REFS + ":")) {
330b5840353SAdam Hornáček             fields.add(REFS);
331b5840353SAdam Hornáček         }
332b5840353SAdam Hornáček         if (queryString.contains(FULL + ":")) {
333b5840353SAdam Hornáček             fields.add(FULL);
334b5840353SAdam Hornáček         }
335b5840353SAdam Hornáček         return fields;
336b5840353SAdam Hornáček     }
337b5840353SAdam Hornáček 
338b5840353SAdam Hornáček     /**
339b5840353SAdam Hornáček      * Get the number of query fields set.
340b5840353SAdam Hornáček      *
341b5840353SAdam Hornáček      * @return the current number of fields with a none-empty query string.
342b5840353SAdam Hornáček      */
getSize()343b5840353SAdam Hornáček     public int getSize() {
344b5840353SAdam Hornáček         return queries.size();
345b5840353SAdam Hornáček     }
346b5840353SAdam Hornáček 
347b5840353SAdam Hornáček     /**
3481c830032SChris Fraire      * Used to tell if this search only has the {@link #DEFS} field filled in.
349b5840353SAdam Hornáček      *
350b5840353SAdam Hornáček      * @return whether above statement is true or false
351b5840353SAdam Hornáček      */
isDefSearch()352b5840353SAdam Hornáček     public boolean isDefSearch() {
353b5840353SAdam Hornáček 
354b5840353SAdam Hornáček         return ((getQueryText(FULL) == null)
355b5840353SAdam Hornáček                 && (getQueryText(REFS) == null)
356b5840353SAdam Hornáček                 && (getQueryText(PATH) == null)
357b5840353SAdam Hornáček                 && (getQueryText(HIST) == null)
358b5840353SAdam Hornáček                 && (getQueryText(DIRPATH) == null)
359b5840353SAdam Hornáček                 && (getQueryText(DEFS) != null));
360b5840353SAdam Hornáček     }
361b5840353SAdam Hornáček 
362b5840353SAdam Hornáček     /**
363974067deSChris Fraire      * Gets a value indicating if this search only has defined the {@link #PATH}
364974067deSChris Fraire      * query field.
365974067deSChris Fraire      */
isPathSearch()366974067deSChris Fraire     public boolean isPathSearch() {
367974067deSChris Fraire         return ((getQueryText(FULL) == null)
368974067deSChris Fraire                 && (getQueryText(REFS) == null)
369974067deSChris Fraire                 && (getQueryText(PATH) != null)
370974067deSChris Fraire                 && (getQueryText(HIST) == null)
371974067deSChris Fraire                 && (getQueryText(DIRPATH) == null)
372974067deSChris Fraire                 && (getQueryText(DEFS) == null));
373974067deSChris Fraire     }
374974067deSChris Fraire 
375974067deSChris Fraire     /**
376b5840353SAdam Hornáček      * Build a new query based on the query text that has been passed in to this
377b5840353SAdam Hornáček      * builder.
378b5840353SAdam Hornáček      *
379b5840353SAdam Hornáček      * @return a query, or {@code null} if no query text is available.
380b5840353SAdam Hornáček      * @throws ParseException if the query text cannot be parsed
381b5840353SAdam Hornáček      */
build()382b5840353SAdam Hornáček     public Query build() throws ParseException {
383b5840353SAdam Hornáček         if (queries.isEmpty()) {
384b5840353SAdam Hornáček             // We don't have any text to parse
385b5840353SAdam Hornáček             return null;
386b5840353SAdam Hornáček         }
387b5840353SAdam Hornáček         // Parse each of the query texts separately
388b5840353SAdam Hornáček         ArrayList<Query> queryList = new ArrayList<>(queries.size());
389b5840353SAdam Hornáček         for (Map.Entry<String, String> entry : queries.entrySet()) {
390b5840353SAdam Hornáček             String field = entry.getKey();
391b5840353SAdam Hornáček             String queryText = entry.getValue();
392b5840353SAdam Hornáček             queryList.add(buildQuery(field, escapeQueryString(field, queryText)));
393b5840353SAdam Hornáček         }
394b5840353SAdam Hornáček         // If we only have one sub-query, return it directly
395b5840353SAdam Hornáček         if (queryList.size() == 1) {
396b5840353SAdam Hornáček             return queryList.get(0);
397b5840353SAdam Hornáček         }
398b5840353SAdam Hornáček         // We have multiple subqueries, so let's combine them into a
399b5840353SAdam Hornáček         // BooleanQuery.
400b5840353SAdam Hornáček         //
401b5840353SAdam Hornáček         // If the subquery is a BooleanQuery, we pull out each clause and
402b5840353SAdam Hornáček         // add it to the outer BooleanQuery so that any negations work on
403b5840353SAdam Hornáček         // the query as a whole. One exception to this rule: If the query
404b5840353SAdam Hornáček         // contains one or more Occur.SHOULD clauses and no Occur.MUST
405b5840353SAdam Hornáček         // clauses, we keep it in a subquery so that the requirement that
406b5840353SAdam Hornáček         // at least one of the Occur.SHOULD clauses must match (pulling them
407b5840353SAdam Hornáček         // out would make all of them optional).
408b5840353SAdam Hornáček         //
409b5840353SAdam Hornáček         // All other types of subqueries are added directly to the outer
410b5840353SAdam Hornáček         // query with Occur.MUST.
411b5840353SAdam Hornáček         BooleanQuery.Builder combinedQuery = new BooleanQuery.Builder();
412b5840353SAdam Hornáček         for (Query query : queryList) {
413b5840353SAdam Hornáček             if (query instanceof BooleanQuery) {
414b5840353SAdam Hornáček                 BooleanQuery boolQuery = (BooleanQuery) query;
415b5840353SAdam Hornáček                 if (hasClause(boolQuery, Occur.SHOULD)
416b5840353SAdam Hornáček                         && !hasClause(boolQuery, Occur.MUST)) {
417b5840353SAdam Hornáček                     combinedQuery.add(query, Occur.MUST);
418b5840353SAdam Hornáček                 } else {
419b5840353SAdam Hornáček                     for (BooleanClause clause : boolQuery) {
420b5840353SAdam Hornáček                         combinedQuery.add(clause);
421b5840353SAdam Hornáček                     }
422b5840353SAdam Hornáček                 }
423b5840353SAdam Hornáček             } else {
424b5840353SAdam Hornáček                 combinedQuery.add(query, Occur.MUST);
425b5840353SAdam Hornáček             }
426b5840353SAdam Hornáček         }
427b5840353SAdam Hornáček         return combinedQuery.build();
428b5840353SAdam Hornáček     }
429b5840353SAdam Hornáček 
430b5840353SAdam Hornáček     /**
431b5840353SAdam Hornáček      * Add query text for the specified field.
432b5840353SAdam Hornáček      *
433b5840353SAdam Hornáček      * @param field the field to add query text for
434b5840353SAdam Hornáček      * @param query the query text to set
435b5840353SAdam Hornáček      * @return this object
436b5840353SAdam Hornáček      */
addQueryText(String field, String query)437b5840353SAdam Hornáček     private QueryBuilder addQueryText(String field, String query) {
438b5840353SAdam Hornáček         if (query == null || query.isEmpty()) {
439b5840353SAdam Hornáček             queries.remove(field);
440b5840353SAdam Hornáček         } else {
441b5840353SAdam Hornáček             queries.put(field, query);
442b5840353SAdam Hornáček         }
443b5840353SAdam Hornáček         return this;
444b5840353SAdam Hornáček     }
445b5840353SAdam Hornáček 
getQueryText(String field)446b5840353SAdam Hornáček     private String getQueryText(String field) {
447b5840353SAdam Hornáček         return queries.get(field);
448b5840353SAdam Hornáček     }
449b5840353SAdam Hornáček 
450b5840353SAdam Hornáček     /**
451b5840353SAdam Hornáček      * Escape special characters in a query string.
452b5840353SAdam Hornáček      *
453b5840353SAdam Hornáček      * @param field the field for which the query string is provided
454b5840353SAdam Hornáček      * @param query the query string to escape
455b5840353SAdam Hornáček      * @return the escaped query string
456b5840353SAdam Hornáček      */
escapeQueryString(String field, String query)457b5840353SAdam Hornáček     private String escapeQueryString(String field, String query) {
458a5cf78b2SChris Fraire         StringReader reader = new StringReader(query);
459a5cf78b2SChris Fraire         StringBuilder res = new StringBuilder();
460b5840353SAdam Hornáček         switch (field) {
461b5840353SAdam Hornáček             case FULL:
462a5cf78b2SChris Fraire                 FullQueryEscaper fesc = new FullQueryEscaper(reader);
463a5cf78b2SChris Fraire                 fesc.setOut(res);
464a5cf78b2SChris Fraire                 fesc.consume();
465a5cf78b2SChris Fraire                 break;
466b5840353SAdam Hornáček             case PATH:
467b5840353SAdam Hornáček                 if (!(query.startsWith("/") && query.endsWith("/"))) {
468a5cf78b2SChris Fraire                     PathQueryEscaper pesc = new PathQueryEscaper(reader);
469a5cf78b2SChris Fraire                     pesc.setOut(res);
470a5cf78b2SChris Fraire                     pesc.consume();
471a5cf78b2SChris Fraire                     break;
472b5840353SAdam Hornáček                 }
473a5cf78b2SChris Fraire                 // FALLTHROUGH
474b5840353SAdam Hornáček             default:
475a5cf78b2SChris Fraire                 DefaultQueryEscaper desc = new DefaultQueryEscaper(reader);
476a5cf78b2SChris Fraire                 desc.setOut(res);
477a5cf78b2SChris Fraire                 desc.consume();
478b5840353SAdam Hornáček         }
479a5cf78b2SChris Fraire         return res.toString();
480b5840353SAdam Hornáček     }
481b5840353SAdam Hornáček 
482b5840353SAdam Hornáček     /**
483b5840353SAdam Hornáček      * Build a subquery against one of the fields.
484b5840353SAdam Hornáček      *
485b5840353SAdam Hornáček      * @param field the field to build the query against
486b5840353SAdam Hornáček      * @param queryText the query text
487b5840353SAdam Hornáček      * @return a parsed query
488b5840353SAdam Hornáček      * @throws ParseException if the query text cannot be parsed
489b5840353SAdam Hornáček      */
buildQuery(String field, String queryText)490911e8af0SAdam Hornáček     protected Query buildQuery(String field, String queryText)
491b5840353SAdam Hornáček             throws ParseException {
492b5840353SAdam Hornáček         return new CustomQueryParser(field).parse(queryText);
493b5840353SAdam Hornáček     }
494b5840353SAdam Hornáček 
495b5840353SAdam Hornáček     /**
496b5840353SAdam Hornáček      * Check if a BooleanQuery contains a clause of a given occur type.
497b5840353SAdam Hornáček      *
498b5840353SAdam Hornáček      * @param query the query to check
499b5840353SAdam Hornáček      * @param occur the occur type to check for
500b5840353SAdam Hornáček      * @return whether or not the query contains a clause of the specified type
501b5840353SAdam Hornáček      */
hasClause(BooleanQuery query, Occur occur)502b5840353SAdam Hornáček     private boolean hasClause(BooleanQuery query, Occur occur) {
503b5840353SAdam Hornáček         for (BooleanClause clause : query) {
504b5840353SAdam Hornáček             if (clause.getOccur().equals(occur)) {
505b5840353SAdam Hornáček                 return true;
506b5840353SAdam Hornáček             }
507b5840353SAdam Hornáček         }
508b5840353SAdam Hornáček         return false;
509b5840353SAdam Hornáček     }
510b5840353SAdam Hornáček }
511