1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2018, Chris Fraire <cfraire@me.com>. 22 */ 23 package org.opengrok.indexer.search.context; 24 25 import java.text.BreakIterator; 26 import java.text.CharacterIterator; 27 import java.text.StringCharacterIterator; 28 import java.util.ArrayList; 29 import java.util.List; 30 31 /** 32 * Represents a subclass of {@link BreakIterator} that breaks at standard 33 * OpenGrok EOL -- namely {@code \r\n}, {@code \n}, or {@code \r}. 34 */ 35 public class StrictLineBreakIterator extends BreakIterator { 36 37 private final List<Integer> breaks = new ArrayList<>(); 38 private char peekChar = CharacterIterator.DONE; 39 private CharacterIterator charIt; 40 private int breakOffset = -1; 41 StrictLineBreakIterator()42 public StrictLineBreakIterator() { 43 charIt = new StringCharacterIterator(""); 44 } 45 46 @Override first()47 public int first() { 48 breaks.clear(); 49 breakOffset = -1; 50 charIt.first(); 51 return 0; 52 } 53 54 @Override last()55 public int last() { 56 int c; 57 do { 58 c = current(); 59 } while (next() != BreakIterator.DONE); 60 return c; 61 } 62 63 @Override next(int n)64 public int next(int n) { 65 if (n < 0) { 66 throw new IllegalArgumentException("n cannot be negative"); 67 } 68 69 int noff = current(); 70 for (int i = 0; i < n; ++i) { 71 noff = next(); 72 if (noff == BreakIterator.DONE) { 73 return noff; 74 } 75 } 76 return noff; 77 } 78 79 @Override next()80 public int next() { 81 if (breakOffset + 1 < breaks.size()) { 82 return breaks.get(++breakOffset); 83 } 84 85 char lastChar = CharacterIterator.DONE; 86 int charOff; 87 while (true) { 88 char nextChar; 89 if (peekChar != CharacterIterator.DONE) { 90 nextChar = peekChar; 91 peekChar = CharacterIterator.DONE; 92 } else { 93 nextChar = charIt.next(); 94 } 95 96 switch (nextChar) { 97 case CharacterIterator.DONE: 98 if (lastChar != CharacterIterator.DONE) { 99 charOff = charIt.getIndex(); 100 breaks.add(charOff); 101 ++breakOffset; 102 return charOff; 103 } else { 104 return BreakIterator.DONE; 105 } 106 case '\n': 107 // charOff is just past the LF 108 charOff = charIt.getIndex() + 1; 109 breaks.add(charOff); 110 ++breakOffset; 111 return charOff; 112 case '\r': 113 charOff = charIt.getIndex() + 1; 114 peekChar = charIt.next(); 115 switch (peekChar) { 116 case '\n': 117 peekChar = CharacterIterator.DONE; 118 // charOff is just past the LF 119 ++charOff; 120 breaks.add(charOff); 121 ++breakOffset; 122 return charOff; 123 case CharacterIterator.DONE: 124 default: 125 breaks.add(charOff); 126 ++breakOffset; 127 return charOff; 128 } 129 default: 130 lastChar = nextChar; 131 break; 132 } 133 } 134 } 135 136 @Override previous()137 public int previous() { 138 if (breakOffset >= 0) { 139 if (--breakOffset >= 0) { 140 return breaks.get(breakOffset); 141 } 142 return 0; 143 } 144 return BreakIterator.DONE; 145 } 146 147 @Override following(int offset)148 public int following(int offset) { 149 if (breaks.size() > 0 && breaks.get(breaks.size() - 1) > offset) { 150 int lo = 0; 151 int hi = breaks.size() - 1; 152 int mid; 153 while (lo <= hi) { 154 mid = lo + (hi - lo) / 2; 155 int boff = breaks.get(mid); 156 if (offset < boff) { 157 if (mid < 1 || offset >= breaks.get(mid - 1)) { 158 return boff; 159 } else { 160 hi = mid - 1; 161 } 162 } else { 163 lo = mid + 1; 164 } 165 } 166 // This should not be reached. 167 return BreakIterator.DONE; 168 } 169 170 int noff = BreakIterator.DONE; 171 do { 172 noff = next(); 173 if (noff > offset) { 174 return noff; 175 } 176 } while (noff != BreakIterator.DONE); 177 return noff; 178 } 179 180 @Override current()181 public int current() { 182 if (breakOffset < 0) { 183 return 0; 184 } 185 return breakOffset < breaks.size() ? breaks.get(breakOffset) : 186 charIt.current(); 187 } 188 189 @Override getText()190 public CharacterIterator getText() { 191 return (CharacterIterator) charIt.clone(); 192 } 193 194 @Override setText(CharacterIterator newText)195 public void setText(CharacterIterator newText) { 196 if (newText == null) { 197 throw new IllegalArgumentException("newText is null"); 198 } 199 this.charIt = newText; 200 this.breaks.clear(); 201 this.peekChar = newText.current(); 202 this.breakOffset = -1; 203 } 204 } 205