xref: /OpenGrok/opengrok-indexer/src/test/java/org/opengrok/indexer/util/LineBreakerTest.java (revision 52d10766ed1db3b0fd2c59a0da7292a32f244b50)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.util;
24 
25 import java.io.IOException;
26 
27 import org.junit.jupiter.api.BeforeAll;
28 import org.junit.jupiter.api.Test;
29 import org.opengrok.indexer.analysis.StreamSource;
30 
31 import static org.junit.jupiter.api.Assertions.assertEquals;
32 
33 /**
34  * Represents a container for tests of {@link LineBreaker}.
35  */
36 public class LineBreakerTest {
37 
38     private static LineBreaker brkr;
39 
40     @BeforeAll
setUpClass()41     public static void setUpClass() {
42         brkr = new LineBreaker();
43     }
44 
45     @Test
shouldSplitEmptyStringIntoOneLine()46     public void shouldSplitEmptyStringIntoOneLine() throws IOException {
47         StreamSource src = StreamSource.fromString("");
48         brkr.reset(src);
49         assertEquals(1, brkr.count(), "split count");
50         assertEquals(0, brkr.getOffset(0), "split offset");
51 
52         assertEquals(0, brkr.findLineIndex(0), "split find-index");
53         assertEquals(-1, brkr.findLineIndex(1), "split find-index");
54     }
55 
56     @Test
shouldSplitEndingLFsIntoOneMoreLine()57     public void shouldSplitEndingLFsIntoOneMoreLine() throws IOException {
58         StreamSource src = StreamSource.fromString("abc\ndef\n");
59         brkr.reset(src);
60         assertEquals(3, brkr.count(), "split count");
61         assertEquals(0, brkr.getOffset(0), "split offset");
62         assertEquals(4, brkr.getOffset(1), "split offset");
63         assertEquals(8, brkr.getOffset(2), "split offset");
64     }
65 
66     @Test
shouldSplitDocsWithNoLastLF()67     public void shouldSplitDocsWithNoLastLF() throws IOException {
68         StreamSource src = StreamSource.fromString("abc\r\ndef");
69         brkr.reset(src);
70         assertEquals(2, brkr.count(), "split count");
71         assertEquals(0, brkr.getOffset(0), "split offset");
72         assertEquals(5, brkr.getOffset(1), "split offset");
73         assertEquals(8, brkr.getOffset(2), "split offset");
74     }
75 
76     @Test
shouldHandleDocsOfLongerLength()77     public void shouldHandleDocsOfLongerLength() throws IOException {
78         //                                  0             0
79         //                    0-- -  5-- - -1--- - 5--- - 2-
80         final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm";
81         StreamSource src = StreamSource.fromString(INPUT);
82 
83         brkr.reset(src);
84         assertEquals(5, brkr.count(), "split count");
85         assertEquals(0, brkr.getOffset(0), "split offset");
86         assertEquals(4, brkr.getOffset(1), "split offset");
87         assertEquals(9, brkr.getOffset(2), "split offset");
88         assertEquals(15, brkr.getOffset(3), "split offset");
89         assertEquals(20, brkr.getOffset(4), "split offset");
90 
91         assertEquals(3, brkr.findLineIndex(19), "split find-index");
92         assertEquals(4, brkr.findLineIndex(20), "split find-index");
93         assertEquals(4, brkr.findLineIndex(21), "split find-index");
94     }
95 
96     @Test
shouldHandleInterspersedLineEndings()97     public void shouldHandleInterspersedLineEndings() throws IOException {
98         //                                    0                0
99         //                    0- -- -5 - -- - 1 - - - -5 -- - -2--
100         //                    0  1  2    3  4 5   6 7  8 9    0
101         //                                                    1
102         final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
103         StreamSource src = StreamSource.fromString(INPUT);
104 
105         brkr.reset(src);
106         assertEquals(11, brkr.count(), "split count");
107         assertEquals(0, brkr.getOffset(0), "split offset");
108         assertEquals(2, brkr.getOffset(1), "split offset");
109         assertEquals(4, brkr.getOffset(2), "split offset");
110         assertEquals(7, brkr.getOffset(3), "split offset");
111         assertEquals(9, brkr.getOffset(4), "split offset");
112         assertEquals(10, brkr.getOffset(5), "split offset");
113         assertEquals(12, brkr.getOffset(6), "split offset");
114         assertEquals(13, brkr.getOffset(7), "split offset");
115         assertEquals(15, brkr.getOffset(8), "split offset");
116         assertEquals(16, brkr.getOffset(9), "split offset");
117         assertEquals(19, brkr.getOffset(10), "split offset");
118         assertEquals(23, brkr.getOffset(11), "split offset");
119     }
120 }
121