xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/TextAnalyzer.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21b5840353SAdam Hornáček  * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved.
22*5d9f3aa0SAdam Hornáček  * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
23b5840353SAdam Hornáček  */
249805b761SAdam Hornáček package org.opengrok.indexer.analysis;
25b5840353SAdam Hornáček 
26b5840353SAdam Hornáček import java.io.IOException;
27b5840353SAdam Hornáček import java.io.InputStream;
28b5840353SAdam Hornáček import java.io.Reader;
29b5840353SAdam Hornáček import java.nio.charset.StandardCharsets;
309cd38b98SChris Fraire import java.util.function.Supplier;
319cd38b98SChris Fraire 
329805b761SAdam Hornáček import org.opengrok.indexer.util.IOUtils;
33b5840353SAdam Hornáček 
34b5840353SAdam Hornáček public abstract class TextAnalyzer extends FileAnalyzer {
35b5840353SAdam Hornáček 
36b5840353SAdam Hornáček     /**
37b5840353SAdam Hornáček      * Creates a new instance of {@link TextAnalyzer}.
38b5840353SAdam Hornáček      * @param factory defined instance for the analyzer
39b5840353SAdam Hornáček      */
TextAnalyzer(AnalyzerFactory factory)4057eefa47SKryštof Tulinger     protected TextAnalyzer(AnalyzerFactory factory) {
41b5840353SAdam Hornáček         super(factory);
42b5840353SAdam Hornáček     }
43b5840353SAdam Hornáček 
44b5840353SAdam Hornáček     /**
45b5840353SAdam Hornáček      * Creates a new instance of {@link TextAnalyzer}.
46b5840353SAdam Hornáček      * @param factory defined instance for the analyzer
479cd38b98SChris Fraire      * @param symbolTokenizerFactory defined instance for the analyzer
48b5840353SAdam Hornáček      */
TextAnalyzer(AnalyzerFactory factory, Supplier<JFlexTokenizer> symbolTokenizerFactory)4957eefa47SKryštof Tulinger     protected TextAnalyzer(AnalyzerFactory factory,
509cd38b98SChris Fraire             Supplier<JFlexTokenizer> symbolTokenizerFactory) {
519cd38b98SChris Fraire         super(factory, symbolTokenizerFactory);
52b5840353SAdam Hornáček     }
53b5840353SAdam Hornáček 
54b5840353SAdam Hornáček     /**
55b5840353SAdam Hornáček      * Gets a version number to be used to tag processed documents so that
56b5840353SAdam Hornáček      * re-analysis can be re-done later if a stored version number is different
57b5840353SAdam Hornáček      * from the current implementation.
58b5840353SAdam Hornáček      * @return 20171223_00
59b5840353SAdam Hornáček      */
60b5840353SAdam Hornáček     @Override
getSpecializedVersionNo()61b5840353SAdam Hornáček     protected int getSpecializedVersionNo() {
62b5840353SAdam Hornáček         return 20171223_00; // Edit comment above too!
63b5840353SAdam Hornáček     }
64b5840353SAdam Hornáček 
65b5840353SAdam Hornáček     /**
66ff44f24aSAdam Hornáček      * Write a cross referenced HTML file reads the source from in.
67b5840353SAdam Hornáček      * @param args a defined instance
68b5840353SAdam Hornáček      * @return the instance used to write the cross-referencing
69b5840353SAdam Hornáček      * @throws IOException if an I/O error occurs
70b5840353SAdam Hornáček      */
71b5840353SAdam Hornáček     @Override
writeXref(WriteXrefArgs args)72b5840353SAdam Hornáček     public Xrefer writeXref(WriteXrefArgs args) throws IOException {
73a72324b1SAdam Hornáček         if (args == null) {
74a72324b1SAdam Hornáček             throw new IllegalArgumentException("`args' is null");
75a72324b1SAdam Hornáček         }
76b5840353SAdam Hornáček         Xrefer xref = newXref(args.getIn());
77b5840353SAdam Hornáček         xref.setDefs(args.getDefs());
78b5840353SAdam Hornáček         xref.setScopesEnabled(scopesEnabled);
79b5840353SAdam Hornáček         xref.setFoldingEnabled(foldingEnabled);
80b5840353SAdam Hornáček         xref.setAnnotation(args.getAnnotation());
81b5840353SAdam Hornáček         xref.setProject(args.getProject());
82b5840353SAdam Hornáček         xref.write(args.getOut());
83b5840353SAdam Hornáček         return xref;
84b5840353SAdam Hornáček     }
85b5840353SAdam Hornáček 
86b5840353SAdam Hornáček     /**
87b5840353SAdam Hornáček      * Derived classes should implement to create an xref for the language
88b5840353SAdam Hornáček      * supported by this analyzer.
89b5840353SAdam Hornáček      * @param reader the data to produce xref for
90b5840353SAdam Hornáček      * @return an xref instance
91b5840353SAdam Hornáček      */
newXref(Reader reader)92b5840353SAdam Hornáček     protected abstract Xrefer newXref(Reader reader);
93b5840353SAdam Hornáček 
94d051e170SChris Fraire     /**
95d051e170SChris Fraire      * Gets a BOM-stripped {@link Reader} (default UTF-8 charset) of the
96d051e170SChris Fraire      * specified {@code stream}, wrapped in a {@link ZeroReader}.
97d051e170SChris Fraire      */
getReader(InputStream stream)98b5840353SAdam Hornáček     protected Reader getReader(InputStream stream) throws IOException {
99d051e170SChris Fraire         // sourceRoot is read with UTF-8 as a default.
100d051e170SChris Fraire         return new ZeroReader(IOUtils.createBOMStrippedReader(stream,
101d051e170SChris Fraire                 StandardCharsets.UTF_8.name()));
102b5840353SAdam Hornáček     }
103b5840353SAdam Hornáček }
104