/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . */ package org.opengrok.indexer.search.context; import java.io.StringReader; import java.util.HashSet; %% %public %class HistoryLineTokenizer %unicode %function next %type String %ignorecase %char %{ public static final HashSet stopset = new HashSet(); static { stopset.add( "a"); stopset.add( "an"); stopset.add( "and"); stopset.add( "are"); stopset.add( "as"); stopset.add( "at"); stopset.add( "be"); stopset.add( "but"); stopset.add( "by"); stopset.add( "for"); stopset.add( "if"); stopset.add( "in"); stopset.add( "into"); stopset.add( "is"); stopset.add( "it"); stopset.add( "no"); stopset.add( "not"); stopset.add( "of"); stopset.add( "on"); stopset.add( "or"); stopset.add( "s"); stopset.add( "such"); stopset.add( "t"); stopset.add( "that"); stopset.add( "the"); stopset.add( "their"); stopset.add( "then"); stopset.add( "there"); stopset.add( "these"); stopset.add( "they"); stopset.add( "this"); stopset.add( "to"); stopset.add( "was"); stopset.add( "will"); stopset.add( "with"); stopset.add( "/"); stopset.add( "\\"); stopset.add(":"); stopset.add("."); stopset.add("0.0"); stopset.add( "1.0"); } public void reInit(String str) { yyreset(new StringReader(str)); } /** Return the position of the first character in the current token. */ long getMatchStart() { return yychar; } /** Return the position of the first character after the current token. */ long getMatchEnd() { return yychar + yylength(); } %} Identifier = [a-zA-Z\p{Letter}_] [a-zA-Z\p{Letter}0-9\p{Number}_]* Number = [0-9]+|[0-9]+\.[0-9]+| "0[xX]" [0-9a-fA-F]+ Printable = [\@\$\%\^\&\-+=\?\.\:] %% {Identifier}|{Number}|{Printable} {String m = yytext(); if(stopset.contains(m)) { } else { return(m);}} <> { return null;} [^] {}