1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 22 */ 23 package org.opengrok.indexer.util; 24 25 import org.opengrok.indexer.analysis.StreamSource; 26 27 import java.io.BufferedReader; 28 import java.io.IOException; 29 import java.io.InputStream; 30 import java.io.Reader; 31 import java.nio.charset.StandardCharsets; 32 33 /** 34 * Represents a container for reusable splitter-oriented utility methods. 35 */ 36 class SplitterUtil { 37 38 @FunctionalInterface 39 interface Resetter { reset(Reader reader)40 void reset(Reader reader) throws IOException; 41 } 42 43 /** 44 * Find the line index for the specified document offset. 45 * @param offset greater than or equal to zero and less than 46 * {@code length}. 47 * @return -1 if {@code offset} is beyond the document bounds; otherwise, 48 * a valid index 49 */ findLineIndex(int length, int[] lineOffsets, int offset)50 static int findLineIndex(int length, int[] lineOffsets, int offset) { 51 if (lineOffsets == null) { 52 throw new IllegalArgumentException("lineOffsets"); 53 } 54 if (offset < 0 || offset > length) { 55 return -1; 56 } 57 58 int lo = 0; 59 int hi = lineOffsets.length - 1; 60 int mid; 61 while (lo <= hi) { 62 mid = lo + (hi - lo) / 2; 63 int lineLength = (mid + 1 < lineOffsets.length ? lineOffsets[mid + 1] : length) - 64 lineOffsets[mid]; 65 if (offset < lineOffsets[mid]) { 66 hi = mid - 1; 67 } else if (lineLength == 0 && offset == lineOffsets[mid]) { 68 return mid; 69 } else if (offset >= lineOffsets[mid] + lineLength) { 70 lo = mid + 1; 71 } else { 72 return mid; 73 } 74 } 75 return -1; 76 } 77 78 /** 79 * Resets the breaker using the specified inputs. 80 * @param resetter a defined instance 81 * @param src a defined instance 82 * @param wrapper an optional instance 83 * @throws java.io.IOException if an I/O error occurs 84 */ reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)85 static void reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper) 86 throws IOException { 87 if (src == null) { 88 throw new IllegalArgumentException("src is null"); 89 } 90 91 try (InputStream in = src.getStream(); 92 Reader rdr = IOUtils.createBOMStrippedReader(in, StandardCharsets.UTF_8.name())) { 93 Reader intermediate = null; 94 if (wrapper != null) { 95 intermediate = wrapper.get(rdr); 96 } 97 98 try (BufferedReader brdr = new BufferedReader(intermediate != null ? 99 intermediate : rdr)) { 100 resetter.reset(brdr); 101 } finally { 102 if (intermediate != null) { 103 intermediate.close(); 104 } 105 } 106 } 107 } 108 109 /* private to enforce static. */ SplitterUtil()110 private SplitterUtil() { 111 } 112 } 113