xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/SplitterUtil.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.util;
24 
25 import org.opengrok.indexer.analysis.StreamSource;
26 
27 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.Reader;
31 import java.nio.charset.StandardCharsets;
32 
33 /**
34  * Represents a container for reusable splitter-oriented utility methods.
35  */
36 class SplitterUtil {
37 
38     @FunctionalInterface
39     interface Resetter {
reset(Reader reader)40         void reset(Reader reader) throws IOException;
41     }
42 
43     /**
44      * Find the line index for the specified document offset.
45      * @param offset greater than or equal to zero and less than
46      * {@code length}.
47      * @return -1 if {@code offset} is beyond the document bounds; otherwise,
48      * a valid index
49      */
findLineIndex(int length, int[] lineOffsets, int offset)50     static int findLineIndex(int length, int[] lineOffsets, int offset) {
51         if (lineOffsets == null) {
52             throw new IllegalArgumentException("lineOffsets");
53         }
54         if (offset < 0 || offset > length) {
55             return -1;
56         }
57 
58         int lo = 0;
59         int hi = lineOffsets.length - 1;
60         int mid;
61         while (lo <= hi) {
62             mid = lo + (hi - lo) / 2;
63             int lineLength = (mid + 1 < lineOffsets.length ? lineOffsets[mid + 1] : length) -
64                     lineOffsets[mid];
65             if (offset < lineOffsets[mid]) {
66                 hi = mid - 1;
67             } else if (lineLength == 0 && offset == lineOffsets[mid]) {
68                 return mid;
69             } else if (offset >= lineOffsets[mid] + lineLength) {
70                 lo = mid + 1;
71             } else {
72                 return mid;
73             }
74         }
75         return -1;
76     }
77 
78     /**
79      * Resets the breaker using the specified inputs.
80      * @param resetter a defined instance
81      * @param src a defined instance
82      * @param wrapper an optional instance
83      * @throws java.io.IOException if an I/O error occurs
84      */
reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)85     static void reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)
86             throws IOException {
87         if (src == null) {
88             throw new IllegalArgumentException("src is null");
89         }
90 
91         try (InputStream in = src.getStream();
92              Reader rdr = IOUtils.createBOMStrippedReader(in, StandardCharsets.UTF_8.name())) {
93             Reader intermediate = null;
94             if (wrapper != null) {
95                 intermediate = wrapper.get(rdr);
96             }
97 
98             try (BufferedReader brdr = new BufferedReader(intermediate != null ?
99                     intermediate : rdr)) {
100                 resetter.reset(brdr);
101             } finally {
102                 if (intermediate != null) {
103                     intermediate.close();
104                 }
105             }
106         }
107     }
108 
109     /* private to enforce static. */
SplitterUtil()110     private SplitterUtil() {
111     }
112 }
113