1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23 */ 24 25 package org.opengrok.indexer.search.context; 26 27 import java.io.StringReader; 28 import java.util.HashSet; 29 %% 30 31 %public 32 %class HistoryLineTokenizer 33 %unicode 34 %function next 35 %type String 36 %ignorecase 37 %char 38 %{ 39 public static final HashSet<String> stopset = new HashSet<String>(); 40 static { 41 stopset.add( "a"); 42 stopset.add( "an"); 43 stopset.add( "and"); 44 stopset.add( "are"); 45 stopset.add( "as"); 46 stopset.add( "at"); 47 stopset.add( "be"); 48 stopset.add( "but"); 49 stopset.add( "by"); 50 stopset.add( "for"); 51 stopset.add( "if"); 52 stopset.add( "in"); 53 stopset.add( "into"); 54 stopset.add( "is"); 55 stopset.add( "it"); 56 stopset.add( "no"); 57 stopset.add( "not"); 58 stopset.add( "of"); 59 stopset.add( "on"); 60 stopset.add( "or"); 61 stopset.add( "s"); 62 stopset.add( "such"); 63 stopset.add( "t"); 64 stopset.add( "that"); 65 stopset.add( "the"); 66 stopset.add( "their"); 67 stopset.add( "then"); 68 stopset.add( "there"); 69 stopset.add( "these"); 70 stopset.add( "they"); 71 stopset.add( "this"); 72 stopset.add( "to"); 73 stopset.add( "was"); 74 stopset.add( "will"); 75 stopset.add( "with"); 76 stopset.add( "/"); 77 stopset.add( "\\"); 78 stopset.add(":"); 79 stopset.add("."); 80 stopset.add("0.0"); 81 stopset.add( "1.0"); 82 } 83 reInit(String str)84 public void reInit(String str) { 85 yyreset(new StringReader(str)); 86 } 87 88 /** Return the position of the first character in the current token. */ getMatchStart()89 long getMatchStart() { 90 return yychar; 91 } 92 93 /** Return the position of the first character after the current token. */ getMatchEnd()94 long getMatchEnd() { 95 return yychar + yylength(); 96 } 97 %} 98 99 Identifier = [a-zA-Z\p{Letter}_] [a-zA-Z\p{Letter}0-9\p{Number}_]* 100 Number = [0-9]+|[0-9]+\.[0-9]+| "0[xX]" [0-9a-fA-F]+ 101 Printable = [\@\$\%\^\&\-+=\?\.\:] 102 103 %% 104 105 106 {Identifier}|{Number}|{Printable} {String m = yytext(); 107 if(stopset.contains(m)) { } else { return(m);}} 108 <<EOF>> { return null;} 109 [^] {} 110