xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/plain/DefinitionsTokenStream.java (revision c389802dce0d8fde10e602cd558c2c2fc332bc29)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
22  * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.analysis.plain;
25 
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.List;
29 import org.apache.lucene.analysis.TokenStream;
30 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
31 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
32 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
33 import org.opengrok.indexer.analysis.Definitions;
34 import org.opengrok.indexer.analysis.PendingToken;
35 import org.opengrok.indexer.analysis.PendingTokenOffsetsComparator;
36 import org.opengrok.indexer.analysis.StreamSource;
37 import org.opengrok.indexer.util.LineBreaker;
38 import org.opengrok.indexer.util.ReaderWrapper;
39 
40 /**
41  * Represents a token stream from {@link Definitions}.
42  */
43 public class DefinitionsTokenStream extends TokenStream {
44 
45     /**
46      * Defines the ultimate queue of tokens to be produced by
47      * {@link #incrementToken()}.
48      */
49     private final List<PendingToken> events = new ArrayList<>();
50 
51     private final CharTermAttribute termAtt = addAttribute(
52         CharTermAttribute.class);
53     private final OffsetAttribute offsetAtt = addAttribute(
54         OffsetAttribute.class);
55     private final PositionIncrementAttribute posIncrAtt = addAttribute(
56         PositionIncrementAttribute.class);
57 
58     private int offset;
59 
60     /**
61      * Initializes the stream by merging {@code defs} with cross-referenced
62      * line offsets read from {@code src}.
63      * @param defs a defined instance
64      * @param src a defined instance
65      * @param wrapper an optional instance
66      * @throws IOException if I/O error occurs
67      */
initialize(Definitions defs, StreamSource src, ReaderWrapper wrapper)68     public void initialize(Definitions defs, StreamSource src,
69             ReaderWrapper wrapper) throws IOException {
70         if (defs == null) {
71             throw new IllegalArgumentException("`defs' is null");
72         }
73         if (src == null) {
74             throw new IllegalArgumentException("`src' is null");
75         }
76 
77         events.clear();
78         offset = 0;
79 
80         LineBreaker brk = new LineBreaker();
81         brk.reset(src, wrapper);
82         createTokens(defs, brk);
83     }
84 
85     /**
86      * Publishes the next, pending token from
87      * {@link #initialize(org.opengrok.indexer.analysis.Definitions, org.opengrok.indexer.analysis.StreamSource,
88      * org.opengrok.indexer.util.ReaderWrapper)},
89      * if one is available.
90      * @return false if no more tokens; otherwise true
91      * @throws IOException in case of I/O error
92      */
93     @Override
incrementToken()94     public final boolean incrementToken() throws IOException {
95         if (offset < events.size()) {
96             PendingToken tok = events.get(offset++);
97             setAttribs(tok);
98             return true;
99         }
100 
101         clearAttributes();
102         return false;
103     }
104 
setAttribs(PendingToken tok)105     private void setAttribs(PendingToken tok) {
106         clearAttributes();
107 
108         this.posIncrAtt.setPositionIncrement(1);
109         this.termAtt.setEmpty();
110         this.termAtt.append(tok.str);
111         this.offsetAtt.setOffset(tok.start, tok.end);
112     }
113 
createTokens(Definitions defs, LineBreaker brk)114     private void createTokens(Definitions defs, LineBreaker brk) {
115         for (Definitions.Tag tag : defs.getTags()) {
116             // Shift from ctags's convention.
117             int lineno = tag.line - 1;
118 
119             if (lineno >= 0 && lineno < brk.count() && tag.symbol != null &&
120                     tag.text != null) {
121                 int lineoff = brk.getOffset(lineno);
122                 if (tag.lineStart >= 0) {
123                     PendingToken tok = new PendingToken(tag.symbol, lineoff +
124                         tag.lineStart, lineoff + tag.lineEnd);
125                     events.add(tok);
126                 }
127             }
128         }
129 
130         events.sort(PendingTokenOffsetsComparator.INSTANCE);
131     }
132 }
133