1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. 22 * Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.analysis.plain; 25 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import java.util.List; 29 import org.apache.lucene.analysis.TokenStream; 30 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; 31 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; 32 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; 33 import org.opengrok.indexer.analysis.Definitions; 34 import org.opengrok.indexer.analysis.PendingToken; 35 import org.opengrok.indexer.analysis.PendingTokenOffsetsComparator; 36 import org.opengrok.indexer.analysis.StreamSource; 37 import org.opengrok.indexer.util.LineBreaker; 38 import org.opengrok.indexer.util.ReaderWrapper; 39 40 /** 41 * Represents a token stream from {@link Definitions}. 42 */ 43 public class DefinitionsTokenStream extends TokenStream { 44 45 /** 46 * Defines the ultimate queue of tokens to be produced by 47 * {@link #incrementToken()}. 48 */ 49 private final List<PendingToken> events = new ArrayList<>(); 50 51 private final CharTermAttribute termAtt = addAttribute( 52 CharTermAttribute.class); 53 private final OffsetAttribute offsetAtt = addAttribute( 54 OffsetAttribute.class); 55 private final PositionIncrementAttribute posIncrAtt = addAttribute( 56 PositionIncrementAttribute.class); 57 58 private int offset; 59 60 /** 61 * Initializes the stream by merging {@code defs} with cross-referenced 62 * line offsets read from {@code src}. 63 * @param defs a defined instance 64 * @param src a defined instance 65 * @param wrapper an optional instance 66 * @throws IOException if I/O error occurs 67 */ initialize(Definitions defs, StreamSource src, ReaderWrapper wrapper)68 public void initialize(Definitions defs, StreamSource src, 69 ReaderWrapper wrapper) throws IOException { 70 if (defs == null) { 71 throw new IllegalArgumentException("`defs' is null"); 72 } 73 if (src == null) { 74 throw new IllegalArgumentException("`src' is null"); 75 } 76 77 events.clear(); 78 offset = 0; 79 80 LineBreaker brk = new LineBreaker(); 81 brk.reset(src, wrapper); 82 createTokens(defs, brk); 83 } 84 85 /** 86 * Publishes the next, pending token from 87 * {@link #initialize(org.opengrok.indexer.analysis.Definitions, org.opengrok.indexer.analysis.StreamSource, 88 * org.opengrok.indexer.util.ReaderWrapper)}, 89 * if one is available. 90 * @return false if no more tokens; otherwise true 91 * @throws IOException in case of I/O error 92 */ 93 @Override incrementToken()94 public final boolean incrementToken() throws IOException { 95 if (offset < events.size()) { 96 PendingToken tok = events.get(offset++); 97 setAttribs(tok); 98 return true; 99 } 100 101 clearAttributes(); 102 return false; 103 } 104 setAttribs(PendingToken tok)105 private void setAttribs(PendingToken tok) { 106 clearAttributes(); 107 108 this.posIncrAtt.setPositionIncrement(1); 109 this.termAtt.setEmpty(); 110 this.termAtt.append(tok.str); 111 this.offsetAtt.setOffset(tok.start, tok.end); 112 } 113 createTokens(Definitions defs, LineBreaker brk)114 private void createTokens(Definitions defs, LineBreaker brk) { 115 for (Definitions.Tag tag : defs.getTags()) { 116 // Shift from ctags's convention. 117 int lineno = tag.line - 1; 118 119 if (lineno >= 0 && lineno < brk.count() && tag.symbol != null && 120 tag.text != null) { 121 int lineoff = brk.getOffset(lineno); 122 if (tag.lineStart >= 0) { 123 PendingToken tok = new PendingToken(tag.symbol, lineoff + 124 tag.lineStart, lineoff + tag.lineEnd); 125 events.add(tok); 126 } 127 } 128 } 129 130 events.sort(PendingTokenOffsetsComparator.INSTANCE); 131 } 132 } 133