xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/StrictLineBreakIterator.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.search.context;
24 
25 import java.text.BreakIterator;
26 import java.text.CharacterIterator;
27 import java.text.StringCharacterIterator;
28 import java.util.ArrayList;
29 import java.util.List;
30 
31 /**
32  * Represents a subclass of {@link BreakIterator} that breaks at standard
33  * OpenGrok EOL -- namely {@code \r\n}, {@code \n}, or {@code \r}.
34  */
35 public class StrictLineBreakIterator extends BreakIterator {
36 
37     private final List<Integer> breaks = new ArrayList<>();
38     private char peekChar = CharacterIterator.DONE;
39     private CharacterIterator charIt;
40     private int breakOffset = -1;
41 
StrictLineBreakIterator()42     public StrictLineBreakIterator() {
43         charIt = new StringCharacterIterator("");
44     }
45 
46     @Override
first()47     public int first() {
48         breaks.clear();
49         breakOffset = -1;
50         charIt.first();
51         return 0;
52     }
53 
54     @Override
last()55     public int last() {
56         int c;
57         do {
58             c = current();
59         } while (next() != BreakIterator.DONE);
60         return c;
61     }
62 
63     @Override
next(int n)64     public int next(int n) {
65         if (n < 0) {
66             throw new IllegalArgumentException("n cannot be negative");
67         }
68 
69         int noff = current();
70         for (int i = 0; i < n; ++i) {
71             noff = next();
72             if (noff == BreakIterator.DONE) {
73                 return noff;
74             }
75         }
76         return noff;
77     }
78 
79     @Override
next()80     public int next() {
81         if (breakOffset + 1 < breaks.size()) {
82             return breaks.get(++breakOffset);
83         }
84 
85         char lastChar = CharacterIterator.DONE;
86         int charOff;
87         while (true) {
88             char nextChar;
89             if (peekChar != CharacterIterator.DONE) {
90                 nextChar = peekChar;
91                 peekChar = CharacterIterator.DONE;
92             } else {
93                 nextChar = charIt.next();
94             }
95 
96             switch (nextChar) {
97                 case CharacterIterator.DONE:
98                     if (lastChar != CharacterIterator.DONE) {
99                         charOff = charIt.getIndex();
100                         breaks.add(charOff);
101                         ++breakOffset;
102                         return charOff;
103                     } else {
104                         return BreakIterator.DONE;
105                     }
106                 case '\n':
107                     // charOff is just past the LF
108                     charOff = charIt.getIndex() + 1;
109                     breaks.add(charOff);
110                     ++breakOffset;
111                     return charOff;
112                 case '\r':
113                     charOff = charIt.getIndex() + 1;
114                     peekChar = charIt.next();
115                     switch (peekChar) {
116                         case '\n':
117                             peekChar = CharacterIterator.DONE;
118                             // charOff is just past the LF
119                             ++charOff;
120                             breaks.add(charOff);
121                             ++breakOffset;
122                             return charOff;
123                         case CharacterIterator.DONE:
124                         default:
125                             breaks.add(charOff);
126                             ++breakOffset;
127                             return charOff;
128                     }
129                 default:
130                     lastChar = nextChar;
131                     break;
132             }
133         }
134     }
135 
136     @Override
previous()137     public int previous() {
138         if (breakOffset >= 0) {
139             if (--breakOffset >= 0) {
140                 return breaks.get(breakOffset);
141             }
142             return 0;
143         }
144         return BreakIterator.DONE;
145     }
146 
147     @Override
following(int offset)148     public int following(int offset) {
149         if (breaks.size() > 0 && breaks.get(breaks.size() - 1) > offset) {
150             int lo = 0;
151             int hi = breaks.size() - 1;
152             int mid;
153             while (lo <= hi) {
154                 mid = lo + (hi - lo) / 2;
155                 int boff = breaks.get(mid);
156                 if (offset < boff) {
157                     if (mid < 1 || offset >= breaks.get(mid - 1)) {
158                         return boff;
159                     } else {
160                         hi = mid - 1;
161                     }
162                 } else {
163                     lo = mid + 1;
164                 }
165             }
166             // This should not be reached.
167             return BreakIterator.DONE;
168         }
169 
170         int noff = BreakIterator.DONE;
171         do {
172             noff = next();
173             if (noff > offset) {
174                 return noff;
175             }
176         } while (noff != BreakIterator.DONE);
177         return noff;
178     }
179 
180     @Override
current()181     public int current() {
182         if (breakOffset < 0) {
183             return 0;
184         }
185         return breakOffset < breaks.size() ? breaks.get(breakOffset) :
186             charIt.current();
187     }
188 
189     @Override
getText()190     public CharacterIterator getText() {
191         return (CharacterIterator) charIt.clone();
192     }
193 
194     @Override
setText(CharacterIterator newText)195     public void setText(CharacterIterator newText) {
196         if (newText == null) {
197             throw new IllegalArgumentException("newText is null");
198         }
199         this.charIt = newText;
200         this.breaks.clear();
201         this.peekChar = newText.current();
202         this.breakOffset = -1;
203     }
204 }
205