xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/SplitterUtil.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1*ee1827acSChris Fraire /*
2*ee1827acSChris Fraire  * CDDL HEADER START
3*ee1827acSChris Fraire  *
4*ee1827acSChris Fraire  * The contents of this file are subject to the terms of the
5*ee1827acSChris Fraire  * Common Development and Distribution License (the "License").
6*ee1827acSChris Fraire  * You may not use this file except in compliance with the License.
7*ee1827acSChris Fraire  *
8*ee1827acSChris Fraire  * See LICENSE.txt included in this distribution for the specific
9*ee1827acSChris Fraire  * language governing permissions and limitations under the License.
10*ee1827acSChris Fraire  *
11*ee1827acSChris Fraire  * When distributing Covered Code, include this CDDL HEADER in each
12*ee1827acSChris Fraire  * file and include the License file at LICENSE.txt.
13*ee1827acSChris Fraire  * If applicable, add the following below this CDDL HEADER, with the
14*ee1827acSChris Fraire  * fields enclosed by brackets "[]" replaced with your own identifying
15*ee1827acSChris Fraire  * information: Portions Copyright [yyyy] [name of copyright owner]
16*ee1827acSChris Fraire  *
17*ee1827acSChris Fraire  * CDDL HEADER END
18*ee1827acSChris Fraire  */
19*ee1827acSChris Fraire 
20*ee1827acSChris Fraire /*
21*ee1827acSChris Fraire  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22*ee1827acSChris Fraire  */
23*ee1827acSChris Fraire package org.opengrok.indexer.util;
24*ee1827acSChris Fraire 
25*ee1827acSChris Fraire import org.opengrok.indexer.analysis.StreamSource;
26*ee1827acSChris Fraire 
27*ee1827acSChris Fraire import java.io.BufferedReader;
28*ee1827acSChris Fraire import java.io.IOException;
29*ee1827acSChris Fraire import java.io.InputStream;
30*ee1827acSChris Fraire import java.io.Reader;
31*ee1827acSChris Fraire import java.nio.charset.StandardCharsets;
32*ee1827acSChris Fraire 
33*ee1827acSChris Fraire /**
34*ee1827acSChris Fraire  * Represents a container for reusable splitter-oriented utility methods.
35*ee1827acSChris Fraire  */
36*ee1827acSChris Fraire class SplitterUtil {
37*ee1827acSChris Fraire 
38*ee1827acSChris Fraire     @FunctionalInterface
39*ee1827acSChris Fraire     interface Resetter {
reset(Reader reader)40*ee1827acSChris Fraire         void reset(Reader reader) throws IOException;
41*ee1827acSChris Fraire     }
42*ee1827acSChris Fraire 
43*ee1827acSChris Fraire     /**
44*ee1827acSChris Fraire      * Find the line index for the specified document offset.
45*ee1827acSChris Fraire      * @param offset greater than or equal to zero and less than
46*ee1827acSChris Fraire      * {@code length}.
47*ee1827acSChris Fraire      * @return -1 if {@code offset} is beyond the document bounds; otherwise,
48*ee1827acSChris Fraire      * a valid index
49*ee1827acSChris Fraire      */
findLineIndex(int length, int[] lineOffsets, int offset)50*ee1827acSChris Fraire     static int findLineIndex(int length, int[] lineOffsets, int offset) {
51*ee1827acSChris Fraire         if (lineOffsets == null) {
52*ee1827acSChris Fraire             throw new IllegalArgumentException("lineOffsets");
53*ee1827acSChris Fraire         }
54*ee1827acSChris Fraire         if (offset < 0 || offset > length) {
55*ee1827acSChris Fraire             return -1;
56*ee1827acSChris Fraire         }
57*ee1827acSChris Fraire 
58*ee1827acSChris Fraire         int lo = 0;
59*ee1827acSChris Fraire         int hi = lineOffsets.length - 1;
60*ee1827acSChris Fraire         int mid;
61*ee1827acSChris Fraire         while (lo <= hi) {
62*ee1827acSChris Fraire             mid = lo + (hi - lo) / 2;
63*ee1827acSChris Fraire             int lineLength = (mid + 1 < lineOffsets.length ? lineOffsets[mid + 1] : length) -
64*ee1827acSChris Fraire                     lineOffsets[mid];
65*ee1827acSChris Fraire             if (offset < lineOffsets[mid]) {
66*ee1827acSChris Fraire                 hi = mid - 1;
67*ee1827acSChris Fraire             } else if (lineLength == 0 && offset == lineOffsets[mid]) {
68*ee1827acSChris Fraire                 return mid;
69*ee1827acSChris Fraire             } else if (offset >= lineOffsets[mid] + lineLength) {
70*ee1827acSChris Fraire                 lo = mid + 1;
71*ee1827acSChris Fraire             } else {
72*ee1827acSChris Fraire                 return mid;
73*ee1827acSChris Fraire             }
74*ee1827acSChris Fraire         }
75*ee1827acSChris Fraire         return -1;
76*ee1827acSChris Fraire     }
77*ee1827acSChris Fraire 
78*ee1827acSChris Fraire     /**
79*ee1827acSChris Fraire      * Resets the breaker using the specified inputs.
80*ee1827acSChris Fraire      * @param resetter a defined instance
81*ee1827acSChris Fraire      * @param src a defined instance
82*ee1827acSChris Fraire      * @param wrapper an optional instance
83*ee1827acSChris Fraire      * @throws java.io.IOException if an I/O error occurs
84*ee1827acSChris Fraire      */
reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)85*ee1827acSChris Fraire     static void reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)
86*ee1827acSChris Fraire             throws IOException {
87*ee1827acSChris Fraire         if (src == null) {
88*ee1827acSChris Fraire             throw new IllegalArgumentException("src is null");
89*ee1827acSChris Fraire         }
90*ee1827acSChris Fraire 
91*ee1827acSChris Fraire         try (InputStream in = src.getStream();
92*ee1827acSChris Fraire              Reader rdr = IOUtils.createBOMStrippedReader(in, StandardCharsets.UTF_8.name())) {
93*ee1827acSChris Fraire             Reader intermediate = null;
94*ee1827acSChris Fraire             if (wrapper != null) {
95*ee1827acSChris Fraire                 intermediate = wrapper.get(rdr);
96*ee1827acSChris Fraire             }
97*ee1827acSChris Fraire 
98*ee1827acSChris Fraire             try (BufferedReader brdr = new BufferedReader(intermediate != null ?
99*ee1827acSChris Fraire                     intermediate : rdr)) {
100*ee1827acSChris Fraire                 resetter.reset(brdr);
101*ee1827acSChris Fraire             } finally {
102*ee1827acSChris Fraire                 if (intermediate != null) {
103*ee1827acSChris Fraire                     intermediate.close();
104*ee1827acSChris Fraire                 }
105*ee1827acSChris Fraire             }
106*ee1827acSChris Fraire         }
107*ee1827acSChris Fraire     }
108*ee1827acSChris Fraire 
109*ee1827acSChris Fraire     /* private to enforce static. */
SplitterUtil()110*ee1827acSChris Fraire     private SplitterUtil() {
111*ee1827acSChris Fraire     }
112*ee1827acSChris Fraire }
113