xref: /OpenGrok/opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/PathTokenizerTest.java (revision 0e4c55544f8ea0a68e8bae37b0e502097e008ec1)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21*52d10766SAdam Hornacek  * Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
22b5840353SAdam Hornáček  */
239805b761SAdam Hornáček package org.opengrok.indexer.analysis;
24b5840353SAdam Hornáček 
25b5840353SAdam Hornáček import java.io.StringReader;
26b5840353SAdam Hornáček import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
27b5840353SAdam Hornáček import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
28*52d10766SAdam Hornacek import org.junit.jupiter.api.Test;
291161d3e8SAdam Hornacek 
30*52d10766SAdam Hornacek import static org.junit.jupiter.api.Assertions.assertEquals;
31*52d10766SAdam Hornacek import static org.junit.jupiter.api.Assertions.assertTrue;
32b5840353SAdam Hornáček 
33b5840353SAdam Hornáček /**
341161d3e8SAdam Hornacek  * Unit test class for PathTokenizer.
35b5840353SAdam Hornáček  * @author Lubos Kosco
36b5840353SAdam Hornáček  */
37b5840353SAdam Hornáček public class PathTokenizerTest {
38b5840353SAdam Hornáček 
39b5840353SAdam Hornáček     /**
40b5840353SAdam Hornáček      * Test of incrementToken method, of class PathTokenizer.
41b5840353SAdam Hornáček      */
42b5840353SAdam Hornáček     @Test
testIncrementToken()43b5840353SAdam Hornáček     public void testIncrementToken() throws Exception {
44b5840353SAdam Hornáček         String inputText = "alpha/beta/gamma/delta.ext";
45b5840353SAdam Hornáček         String[] expectedTokens = inputText.split("[/.]");
46b5840353SAdam Hornáček         PathTokenizer tokenizer = new PathTokenizer();
47b5840353SAdam Hornáček         tokenizer.setReader(new StringReader(inputText));
48b5840353SAdam Hornáček         CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
49b5840353SAdam Hornáček         OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
50b5840353SAdam Hornáček 
51b5840353SAdam Hornáček         int count = 0;
52b5840353SAdam Hornáček         int dots = 0;
53b5840353SAdam Hornáček         tokenizer.reset();
54b5840353SAdam Hornáček         while (tokenizer.incrementToken()) {
551161d3e8SAdam Hornacek             if (term.toString().equals(".")) {
561161d3e8SAdam Hornacek                 dots++;
571161d3e8SAdam Hornacek                 break;
581161d3e8SAdam Hornacek             }
59*52d10766SAdam Hornacek             assertTrue(count < expectedTokens.length, "too many tokens");
60b5840353SAdam Hornáček             String expected = expectedTokens[count];
61*52d10766SAdam Hornacek             assertEquals(expected, term.toString(), "term");
62*52d10766SAdam Hornacek             assertEquals(inputText.indexOf(expected), offset.startOffset(), "start");
63*52d10766SAdam Hornacek             assertEquals(inputText.indexOf(expected) + expected.length(), offset.endOffset(), "end");
64b5840353SAdam Hornáček             count++;
65b5840353SAdam Hornáček         }
66b5840353SAdam Hornáček         tokenizer.end();
67b5840353SAdam Hornáček         tokenizer.close();
68*52d10766SAdam Hornacek         assertEquals(expectedTokens.length, count + dots, "wrong number of tokens");
69b5840353SAdam Hornáček     }
70b5840353SAdam Hornáček }
71