xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/JFlexSymbolMatcher.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.analysis;
24 
25 import java.util.Locale;
26 import java.util.Set;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29 import org.opengrok.indexer.util.StringUtils;
30 import org.opengrok.indexer.util.UriUtils;
31 
32 /**
33  * Represents an abstract base class for subclasses of
34  * {@link JFlexStateStacker} that can publish as {@link ScanningSymbolMatcher}.
35  */
36 public abstract class JFlexSymbolMatcher extends JFlexStateStacker
37         implements ScanningSymbolMatcher {
38 
39     private SymbolMatchedListener symbolListener;
40     private NonSymbolMatchedListener nonSymbolListener;
41     private String disjointSpanClassName;
42 
43     /**
44      * Associates the specified listener, replacing the former one.
45      * @param l defined instance
46      */
47     @Override
setSymbolMatchedListener(SymbolMatchedListener l)48     public void setSymbolMatchedListener(SymbolMatchedListener l) {
49         if (l == null) {
50             throw new IllegalArgumentException("`l' is null");
51         }
52         symbolListener = l;
53     }
54 
55     /**
56      * Clears any association to a listener.
57      */
58     @Override
clearSymbolMatchedListener()59     public void clearSymbolMatchedListener() {
60         symbolListener = null;
61     }
62 
63     /**
64      * Associates the specified listener, replacing the former one.
65      * @param l defined instance
66      */
67     @Override
setNonSymbolMatchedListener(NonSymbolMatchedListener l)68     public void setNonSymbolMatchedListener(NonSymbolMatchedListener l) {
69         if (l == null) {
70             throw new IllegalArgumentException("`l' is null");
71         }
72         nonSymbolListener = l;
73     }
74 
75     /**
76      * Clears any association to a listener.
77      */
78     @Override
clearNonSymbolMatchedListener()79     public void clearNonSymbolMatchedListener() {
80         nonSymbolListener = null;
81     }
82 
83     /**
84      * Gets the class name value from the last call to
85      * {@link #onDisjointSpanChanged(String, long)}.
86      * @return a defined value or null
87      */
getDisjointSpanClassName()88     protected String getDisjointSpanClassName() {
89         return disjointSpanClassName;
90     }
91 
92     /**
93      * Raises
94      * {@link SymbolMatchedListener#symbolMatched(org.opengrok.indexer.analysis.SymbolMatchedEvent)}
95      * for a subscribed listener.
96      * @param str the symbol string
97      * @param start the symbol start position
98      */
onSymbolMatched(String str, long start)99     protected void onSymbolMatched(String str, long start) {
100         SymbolMatchedListener l = symbolListener;
101         if (l != null) {
102             SymbolMatchedEvent evt = new SymbolMatchedEvent(this, str, start,
103                 start + str.length());
104             l.symbolMatched(evt);
105         }
106     }
107 
108     /**
109      * Raises
110      * {@link SymbolMatchedListener#sourceCodeSeen(org.opengrok.indexer.analysis.SourceCodeSeenEvent)}
111      * for all subscribed listeners in turn.
112      * @param start the source code start position
113      */
onSourceCodeSeen(long start)114     protected void onSourceCodeSeen(long start) {
115         SymbolMatchedListener l = symbolListener;
116         if (l != null) {
117             SourceCodeSeenEvent evt = new SourceCodeSeenEvent(this, start);
118             l.sourceCodeSeen(evt);
119         }
120     }
121 
122     /**
123      * Calls {@link #onNonSymbolMatched(String, long)} with the
124      * {@link String#valueOf(char)} {@code c} and {@code start}.
125      * @param c the text character
126      * @param start the text start position
127      */
onNonSymbolMatched(char c, long start)128     protected void onNonSymbolMatched(char c, long start) {
129         onNonSymbolMatched(String.valueOf(c), start);
130     }
131 
132     /**
133      * Raises
134      * {@link NonSymbolMatchedListener#nonSymbolMatched(org.opengrok.indexer.analysis.TextMatchedEvent)}
135      * for a subscribed listener.
136      * @param str the text string
137      * @param start the text start position
138      */
onNonSymbolMatched(String str, long start)139     protected void onNonSymbolMatched(String str, long start) {
140         NonSymbolMatchedListener l = nonSymbolListener;
141         if (l != null) {
142             TextMatchedEvent evt = new TextMatchedEvent(this, str, start,
143                 start + str.length());
144             l.nonSymbolMatched(evt);
145         }
146     }
147 
148     /**
149      * Raises
150      * {@link NonSymbolMatchedListener#nonSymbolMatched(org.opengrok.indexer.analysis.TextMatchedEvent)}
151      * for a subscribed listener.
152      * @param str the text string
153      * @param hint the text hint
154      * @param start the text start position
155      */
onNonSymbolMatched(String str, EmphasisHint hint, long start)156     protected void onNonSymbolMatched(String str, EmphasisHint hint, long start) {
157         NonSymbolMatchedListener l = nonSymbolListener;
158         if (l != null) {
159             TextMatchedEvent evt = new TextMatchedEvent(this, str, hint, start,
160                 start + str.length());
161             l.nonSymbolMatched(evt);
162         }
163     }
164 
165     /**
166      * Raises
167      * {@link NonSymbolMatchedListener#keywordMatched(org.opengrok.indexer.analysis.TextMatchedEvent)}
168      * for a subscribed listener.
169      * @param str the text string
170      * @param start the text start position
171      */
onKeywordMatched(String str, long start)172     protected void onKeywordMatched(String str, long start) {
173         NonSymbolMatchedListener l = nonSymbolListener;
174         if (l != null) {
175             TextMatchedEvent evt = new TextMatchedEvent(this, str, start,
176                 start + str.length());
177             l.keywordMatched(evt);
178         }
179     }
180 
181     /**
182      * Calls {@link #setLineNumber(int)} with the sum of
183      * {@link #getLineNumber()} and the number of LFs in {@code str}, and then
184      * raises
185      * {@link NonSymbolMatchedListener#endOfLineMatched(org.opengrok.indexer.analysis.TextMatchedEvent)}
186      * for a subscribed listener.
187      * @param str the text string
188      * @param start the text start position
189      */
onEndOfLineMatched(String str, long start)190     protected void onEndOfLineMatched(String str, long start) {
191         setLineNumber(getLineNumber() + countEOLs(str));
192         NonSymbolMatchedListener l = nonSymbolListener;
193         if (l != null) {
194             TextMatchedEvent evt = new TextMatchedEvent(this, str, start,
195                 start + str.length());
196             l.endOfLineMatched(evt);
197         }
198     }
199 
200     /**
201      * Raises
202      * {@link NonSymbolMatchedListener#disjointSpanChanged(org.opengrok.indexer.analysis.DisjointSpanChangedEvent)}
203      * for a subscribed listener.
204      * @param className the text string
205      * @param position the text position
206      */
onDisjointSpanChanged(String className, long position)207     protected void onDisjointSpanChanged(String className, long position) {
208         disjointSpanClassName = className;
209         NonSymbolMatchedListener l = nonSymbolListener;
210         if (l != null) {
211             DisjointSpanChangedEvent evt = new DisjointSpanChangedEvent(this,
212                 className, position);
213             l.disjointSpanChanged(evt);
214         }
215     }
216 
217     /**
218      * Calls
219      * {@link #onUriMatched(String, long, Pattern)}
220      * with {@code uri}, {@code start}, and {@code null}.
221      * @param uri the URI string
222      * @param start the URI start position
223      */
onUriMatched(String uri, long start)224     protected void onUriMatched(String uri, long start) {
225         onUriMatched(uri, start, null);
226     }
227 
228     /**
229      * Raises
230      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
231      * of {@link LinkageType#URI} for a subscribed listener.
232      * <p>First, the end of {@code uri} is possibly trimmed (with a
233      * corresponding call to {@link #yypushback(int)}) based on the result
234      * of {@link StringUtils#countURIEndingPushback(java.lang.String)} and
235      * optionally
236      * {@link StringUtils#countPushback(java.lang.String, java.util.regex.Pattern)}
237      * if {@code collateralCapture} is not null.
238      * <p>If the pushback count is equal to the length of {@code url}, then it
239      * is simply written -- and nothing is pushed back -- in order to avoid a
240      * never-ending {@code yylex()} loop.
241      *
242      * @param uri the URI string
243      * @param start the URI start position
244      * @param collateralCapture optional pattern to indicate characters which
245      * may have been captured as valid URI characters but in a particular
246      * context should mark the start of a pushback
247      */
onUriMatched(String uri, long start, Pattern collateralCapture)248     protected void onUriMatched(String uri, long start, Pattern collateralCapture) {
249         UriUtils.TrimUriResult result = UriUtils.trimUri(uri, true, collateralCapture);
250         if (result.getPushBackCount() > 0) {
251             yypushback(result.getPushBackCount());
252         }
253 
254         NonSymbolMatchedListener l = nonSymbolListener;
255         if (l != null) {
256             uri = result.getUri();
257             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, uri,
258                 LinkageType.URI, start, start + uri.length());
259             l.linkageMatched(evt);
260         }
261     }
262 
263     /**
264      * Raises
265      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
266      * of {@link LinkageType#FILELIKE} for a subscribed listener.
267      * @param str the text string
268      * @param start the text start position
269      */
onFilelikeMatched(String str, long start)270     protected void onFilelikeMatched(String str, long start) {
271         NonSymbolMatchedListener l = nonSymbolListener;
272         if (l != null) {
273             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
274                 LinkageType.FILELIKE, start, start + str.length());
275             l.linkageMatched(evt);
276         }
277     }
278 
279     /**
280      * Raises
281      * {@link NonSymbolMatchedListener#pathlikeMatched(org.opengrok.indexer.analysis.PathlikeMatchedEvent)}
282      * for a subscribed listener.
283      * @param str the path text string
284      * @param sep the path separator
285      * @param canonicalize a value indicating whether the path should be
286      * canonicalized
287      * @param start the text start position
288      */
onPathlikeMatched(String str, char sep, boolean canonicalize, long start)289     protected void onPathlikeMatched(String str, char sep, boolean canonicalize, long start) {
290         NonSymbolMatchedListener l = nonSymbolListener;
291         if (l != null) {
292             PathlikeMatchedEvent  evt = new PathlikeMatchedEvent(this, str,
293                 sep, canonicalize, start, start + str.length());
294             l.pathlikeMatched(evt);
295         }
296     }
297 
298     /**
299      * Raises
300      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
301      * of {@link LinkageType#EMAIL} for a subscribed listener.
302      * @param str the text string
303      * @param start the text start position
304      */
onEmailAddressMatched(String str, long start)305     protected void onEmailAddressMatched(String str, long start) {
306         NonSymbolMatchedListener l = nonSymbolListener;
307         if (l != null) {
308             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
309                 LinkageType.EMAIL, start, start + str.length());
310             l.linkageMatched(evt);
311         }
312     }
313 
314     /**
315      * Raises
316      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
317      * of {@link LinkageType#LABEL} for a subscribed listener.
318      * @param str the text string (literal capture)
319      * @param start the text start position
320      * @param lstr the text link string
321      */
onLabelMatched(String str, long start, String lstr)322     protected void onLabelMatched(String str, long start, String lstr) {
323         NonSymbolMatchedListener l = nonSymbolListener;
324         if (l != null) {
325             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
326                 LinkageType.LABEL, start, start + str.length(), lstr);
327             l.linkageMatched(evt);
328         }
329     }
330 
331     /**
332      * Raises
333      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
334      * of {@link LinkageType#LABELDEF} for a subscribed listener.
335      * @param str the text string (literal capture)
336      * @param start the text start position
337      */
onLabelDefMatched(String str, long start)338     protected void onLabelDefMatched(String str, long start) {
339         NonSymbolMatchedListener l = nonSymbolListener;
340         if (l != null) {
341             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
342                 LinkageType.LABELDEF, start, start + str.length());
343             l.linkageMatched(evt);
344         }
345     }
346 
347     /**
348      * Raises
349      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
350      * of {@link LinkageType#QUERY} for a subscribed listener.
351      * @param str the text string
352      * @param start the text start position
353      */
onQueryTermMatched(String str, long start)354     protected void onQueryTermMatched(String str, long start) {
355         NonSymbolMatchedListener l = nonSymbolListener;
356         if (l != null) {
357             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
358                 LinkageType.QUERY, start, start + str.length());
359             l.linkageMatched(evt);
360         }
361     }
362 
363     /**
364      * Raises
365      * {@link NonSymbolMatchedListener#linkageMatched(org.opengrok.indexer.analysis.LinkageMatchedEvent)}
366      * of {@link LinkageType#REFS} for a subscribed listener.
367      * @param str the text string
368      * @param start the text start position
369      */
onRefsTermMatched(String str, long start)370     protected void onRefsTermMatched(String str, long start) {
371         NonSymbolMatchedListener l = nonSymbolListener;
372         if (l != null) {
373             LinkageMatchedEvent evt = new LinkageMatchedEvent(this, str,
374                 LinkageType.REFS, start, start + str.length());
375             l.linkageMatched(evt);
376         }
377     }
378 
379     /**
380      * Raises
381      * {@link NonSymbolMatchedListener#scopeChanged(org.opengrok.indexer.analysis.ScopeChangedEvent)}
382      * for a subscribed listener.
383      * @param action the scope change action
384      * @param str the text string
385      * @param start the text start position
386      */
onScopeChanged(ScopeAction action, String str, long start)387     protected void onScopeChanged(ScopeAction action, String str, long start) {
388         NonSymbolMatchedListener l = nonSymbolListener;
389         if (l != null) {
390             ScopeChangedEvent evt = new ScopeChangedEvent(this, action, str,
391                 start, start + str.length());
392             l.scopeChanged(evt);
393         }
394     }
395 
396     /**
397      * Calls
398      * {@link #onFilteredSymbolMatched(String, long, Set, boolean)}
399      * with {@code str}, {@code start}, {@code keywords}, and {@code true}.
400      * @param str the text string
401      * @param start the text start position
402      * @param keywords an optional set to search for {@code str} as a member to
403      * indicate a keyword
404      * @return true if the {@code str} was not in {@code keywords} or if
405      * {@code keywords} was null
406      */
onFilteredSymbolMatched(String str, long start, Set<String> keywords)407     protected boolean onFilteredSymbolMatched(String str, long start, Set<String> keywords) {
408         return onFilteredSymbolMatched(str, start, keywords, true);
409     }
410 
411     /**
412      * Raises {@link #onKeywordMatched(String, long)} if
413      * {@code keywords} is not null and {@code str} is found as a member (in a
414      * case-sensitive or case-less search per {@code caseSensitive}); otherwise
415      * raises {@link #onSymbolMatched(String, long)}.
416      * @param str the text string
417      * @param start the text start position
418      * @param keywords an optional set to search for {@code str} as a member to
419      * indicate a keyword
420      * @param caseSensitive a value indicating if {@code keywords} should be
421      * searched for {@code str} as-is ({@code true}) or if the lower-case
422      * equivalent of {@code str} should be used ({@code false}).
423      * @return true if the {@code str} was not in {@code keywords} or if
424      * {@code keywords} was null
425      */
onFilteredSymbolMatched(String str, long start, Set<String> keywords, boolean caseSensitive)426     protected boolean onFilteredSymbolMatched(String str, long start, Set<String> keywords,
427             boolean caseSensitive) {
428 
429         if (keywords != null) {
430             String check = caseSensitive ? str : str.toLowerCase(Locale.ROOT);
431             if (keywords.contains(check)) {
432                 onKeywordMatched(str, start);
433                 return false;
434             }
435         }
436         onSymbolMatched(str, start);
437         return true;
438     }
439 
countEOLs(String str)440     private static int countEOLs(String str) {
441         Matcher m = StringUtils.STANDARD_EOL.matcher(str);
442         int n = 0;
443         while (m.find()) {
444             ++n;
445         }
446         return n;
447     }
448 }
449