1*ee1827acSChris Fraire /* 2*ee1827acSChris Fraire * CDDL HEADER START 3*ee1827acSChris Fraire * 4*ee1827acSChris Fraire * The contents of this file are subject to the terms of the 5*ee1827acSChris Fraire * Common Development and Distribution License (the "License"). 6*ee1827acSChris Fraire * You may not use this file except in compliance with the License. 7*ee1827acSChris Fraire * 8*ee1827acSChris Fraire * See LICENSE.txt included in this distribution for the specific 9*ee1827acSChris Fraire * language governing permissions and limitations under the License. 10*ee1827acSChris Fraire * 11*ee1827acSChris Fraire * When distributing Covered Code, include this CDDL HEADER in each 12*ee1827acSChris Fraire * file and include the License file at LICENSE.txt. 13*ee1827acSChris Fraire * If applicable, add the following below this CDDL HEADER, with the 14*ee1827acSChris Fraire * fields enclosed by brackets "[]" replaced with your own identifying 15*ee1827acSChris Fraire * information: Portions Copyright [yyyy] [name of copyright owner] 16*ee1827acSChris Fraire * 17*ee1827acSChris Fraire * CDDL HEADER END 18*ee1827acSChris Fraire */ 19*ee1827acSChris Fraire 20*ee1827acSChris Fraire /* 21*ee1827acSChris Fraire * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 22*ee1827acSChris Fraire */ 23*ee1827acSChris Fraire package org.opengrok.indexer.util; 24*ee1827acSChris Fraire 25*ee1827acSChris Fraire import org.opengrok.indexer.analysis.StreamSource; 26*ee1827acSChris Fraire 27*ee1827acSChris Fraire import java.io.BufferedReader; 28*ee1827acSChris Fraire import java.io.IOException; 29*ee1827acSChris Fraire import java.io.InputStream; 30*ee1827acSChris Fraire import java.io.Reader; 31*ee1827acSChris Fraire import java.nio.charset.StandardCharsets; 32*ee1827acSChris Fraire 33*ee1827acSChris Fraire /** 34*ee1827acSChris Fraire * Represents a container for reusable splitter-oriented utility methods. 35*ee1827acSChris Fraire */ 36*ee1827acSChris Fraire class SplitterUtil { 37*ee1827acSChris Fraire 38*ee1827acSChris Fraire @FunctionalInterface 39*ee1827acSChris Fraire interface Resetter { reset(Reader reader)40*ee1827acSChris Fraire void reset(Reader reader) throws IOException; 41*ee1827acSChris Fraire } 42*ee1827acSChris Fraire 43*ee1827acSChris Fraire /** 44*ee1827acSChris Fraire * Find the line index for the specified document offset. 45*ee1827acSChris Fraire * @param offset greater than or equal to zero and less than 46*ee1827acSChris Fraire * {@code length}. 47*ee1827acSChris Fraire * @return -1 if {@code offset} is beyond the document bounds; otherwise, 48*ee1827acSChris Fraire * a valid index 49*ee1827acSChris Fraire */ findLineIndex(int length, int[] lineOffsets, int offset)50*ee1827acSChris Fraire static int findLineIndex(int length, int[] lineOffsets, int offset) { 51*ee1827acSChris Fraire if (lineOffsets == null) { 52*ee1827acSChris Fraire throw new IllegalArgumentException("lineOffsets"); 53*ee1827acSChris Fraire } 54*ee1827acSChris Fraire if (offset < 0 || offset > length) { 55*ee1827acSChris Fraire return -1; 56*ee1827acSChris Fraire } 57*ee1827acSChris Fraire 58*ee1827acSChris Fraire int lo = 0; 59*ee1827acSChris Fraire int hi = lineOffsets.length - 1; 60*ee1827acSChris Fraire int mid; 61*ee1827acSChris Fraire while (lo <= hi) { 62*ee1827acSChris Fraire mid = lo + (hi - lo) / 2; 63*ee1827acSChris Fraire int lineLength = (mid + 1 < lineOffsets.length ? lineOffsets[mid + 1] : length) - 64*ee1827acSChris Fraire lineOffsets[mid]; 65*ee1827acSChris Fraire if (offset < lineOffsets[mid]) { 66*ee1827acSChris Fraire hi = mid - 1; 67*ee1827acSChris Fraire } else if (lineLength == 0 && offset == lineOffsets[mid]) { 68*ee1827acSChris Fraire return mid; 69*ee1827acSChris Fraire } else if (offset >= lineOffsets[mid] + lineLength) { 70*ee1827acSChris Fraire lo = mid + 1; 71*ee1827acSChris Fraire } else { 72*ee1827acSChris Fraire return mid; 73*ee1827acSChris Fraire } 74*ee1827acSChris Fraire } 75*ee1827acSChris Fraire return -1; 76*ee1827acSChris Fraire } 77*ee1827acSChris Fraire 78*ee1827acSChris Fraire /** 79*ee1827acSChris Fraire * Resets the breaker using the specified inputs. 80*ee1827acSChris Fraire * @param resetter a defined instance 81*ee1827acSChris Fraire * @param src a defined instance 82*ee1827acSChris Fraire * @param wrapper an optional instance 83*ee1827acSChris Fraire * @throws java.io.IOException if an I/O error occurs 84*ee1827acSChris Fraire */ reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper)85*ee1827acSChris Fraire static void reset(Resetter resetter, StreamSource src, ReaderWrapper wrapper) 86*ee1827acSChris Fraire throws IOException { 87*ee1827acSChris Fraire if (src == null) { 88*ee1827acSChris Fraire throw new IllegalArgumentException("src is null"); 89*ee1827acSChris Fraire } 90*ee1827acSChris Fraire 91*ee1827acSChris Fraire try (InputStream in = src.getStream(); 92*ee1827acSChris Fraire Reader rdr = IOUtils.createBOMStrippedReader(in, StandardCharsets.UTF_8.name())) { 93*ee1827acSChris Fraire Reader intermediate = null; 94*ee1827acSChris Fraire if (wrapper != null) { 95*ee1827acSChris Fraire intermediate = wrapper.get(rdr); 96*ee1827acSChris Fraire } 97*ee1827acSChris Fraire 98*ee1827acSChris Fraire try (BufferedReader brdr = new BufferedReader(intermediate != null ? 99*ee1827acSChris Fraire intermediate : rdr)) { 100*ee1827acSChris Fraire resetter.reset(brdr); 101*ee1827acSChris Fraire } finally { 102*ee1827acSChris Fraire if (intermediate != null) { 103*ee1827acSChris Fraire intermediate.close(); 104*ee1827acSChris Fraire } 105*ee1827acSChris Fraire } 106*ee1827acSChris Fraire } 107*ee1827acSChris Fraire } 108*ee1827acSChris Fraire 109*ee1827acSChris Fraire /* private to enforce static. */ SplitterUtil()110*ee1827acSChris Fraire private SplitterUtil() { 111*ee1827acSChris Fraire } 112*ee1827acSChris Fraire } 113