1b5840353SAdam Hornáček /* 2b5840353SAdam Hornáček * CDDL HEADER START 3b5840353SAdam Hornáček * 4b5840353SAdam Hornáček * The contents of this file are subject to the terms of the 5b5840353SAdam Hornáček * Common Development and Distribution License (the "License"). 6b5840353SAdam Hornáček * You may not use this file except in compliance with the License. 7b5840353SAdam Hornáček * 8b5840353SAdam Hornáček * See LICENSE.txt included in this distribution for the specific 9b5840353SAdam Hornáček * language governing permissions and limitations under the License. 10b5840353SAdam Hornáček * 11b5840353SAdam Hornáček * When distributing Covered Code, include this CDDL HEADER in each 12b5840353SAdam Hornáček * file and include the License file at LICENSE.txt. 13b5840353SAdam Hornáček * If applicable, add the following below this CDDL HEADER, with the 14b5840353SAdam Hornáček * fields enclosed by brackets "[]" replaced with your own identifying 15b5840353SAdam Hornáček * information: Portions Copyright [yyyy] [name of copyright owner] 16b5840353SAdam Hornáček * 17b5840353SAdam Hornáček * CDDL HEADER END 18b5840353SAdam Hornáček */ 19b5840353SAdam Hornáček 20b5840353SAdam Hornáček /* 21b5840353SAdam Hornáček * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. 22*5d9f3aa0SAdam Hornáček * Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>. 23b5840353SAdam Hornáček */ 249805b761SAdam Hornáček package org.opengrok.indexer.analysis; 25b5840353SAdam Hornáček 26b5840353SAdam Hornáček import java.io.IOException; 27b5840353SAdam Hornáček import java.io.InputStream; 28b5840353SAdam Hornáček import java.io.Reader; 29b5840353SAdam Hornáček import java.nio.charset.StandardCharsets; 309cd38b98SChris Fraire import java.util.function.Supplier; 319cd38b98SChris Fraire 329805b761SAdam Hornáček import org.opengrok.indexer.util.IOUtils; 33b5840353SAdam Hornáček 34b5840353SAdam Hornáček public abstract class TextAnalyzer extends FileAnalyzer { 35b5840353SAdam Hornáček 36b5840353SAdam Hornáček /** 37b5840353SAdam Hornáček * Creates a new instance of {@link TextAnalyzer}. 38b5840353SAdam Hornáček * @param factory defined instance for the analyzer 39b5840353SAdam Hornáček */ TextAnalyzer(AnalyzerFactory factory)4057eefa47SKryštof Tulinger protected TextAnalyzer(AnalyzerFactory factory) { 41b5840353SAdam Hornáček super(factory); 42b5840353SAdam Hornáček } 43b5840353SAdam Hornáček 44b5840353SAdam Hornáček /** 45b5840353SAdam Hornáček * Creates a new instance of {@link TextAnalyzer}. 46b5840353SAdam Hornáček * @param factory defined instance for the analyzer 479cd38b98SChris Fraire * @param symbolTokenizerFactory defined instance for the analyzer 48b5840353SAdam Hornáček */ TextAnalyzer(AnalyzerFactory factory, Supplier<JFlexTokenizer> symbolTokenizerFactory)4957eefa47SKryštof Tulinger protected TextAnalyzer(AnalyzerFactory factory, 509cd38b98SChris Fraire Supplier<JFlexTokenizer> symbolTokenizerFactory) { 519cd38b98SChris Fraire super(factory, symbolTokenizerFactory); 52b5840353SAdam Hornáček } 53b5840353SAdam Hornáček 54b5840353SAdam Hornáček /** 55b5840353SAdam Hornáček * Gets a version number to be used to tag processed documents so that 56b5840353SAdam Hornáček * re-analysis can be re-done later if a stored version number is different 57b5840353SAdam Hornáček * from the current implementation. 58b5840353SAdam Hornáček * @return 20171223_00 59b5840353SAdam Hornáček */ 60b5840353SAdam Hornáček @Override getSpecializedVersionNo()61b5840353SAdam Hornáček protected int getSpecializedVersionNo() { 62b5840353SAdam Hornáček return 20171223_00; // Edit comment above too! 63b5840353SAdam Hornáček } 64b5840353SAdam Hornáček 65b5840353SAdam Hornáček /** 66ff44f24aSAdam Hornáček * Write a cross referenced HTML file reads the source from in. 67b5840353SAdam Hornáček * @param args a defined instance 68b5840353SAdam Hornáček * @return the instance used to write the cross-referencing 69b5840353SAdam Hornáček * @throws IOException if an I/O error occurs 70b5840353SAdam Hornáček */ 71b5840353SAdam Hornáček @Override writeXref(WriteXrefArgs args)72b5840353SAdam Hornáček public Xrefer writeXref(WriteXrefArgs args) throws IOException { 73a72324b1SAdam Hornáček if (args == null) { 74a72324b1SAdam Hornáček throw new IllegalArgumentException("`args' is null"); 75a72324b1SAdam Hornáček } 76b5840353SAdam Hornáček Xrefer xref = newXref(args.getIn()); 77b5840353SAdam Hornáček xref.setDefs(args.getDefs()); 78b5840353SAdam Hornáček xref.setScopesEnabled(scopesEnabled); 79b5840353SAdam Hornáček xref.setFoldingEnabled(foldingEnabled); 80b5840353SAdam Hornáček xref.setAnnotation(args.getAnnotation()); 81b5840353SAdam Hornáček xref.setProject(args.getProject()); 82b5840353SAdam Hornáček xref.write(args.getOut()); 83b5840353SAdam Hornáček return xref; 84b5840353SAdam Hornáček } 85b5840353SAdam Hornáček 86b5840353SAdam Hornáček /** 87b5840353SAdam Hornáček * Derived classes should implement to create an xref for the language 88b5840353SAdam Hornáček * supported by this analyzer. 89b5840353SAdam Hornáček * @param reader the data to produce xref for 90b5840353SAdam Hornáček * @return an xref instance 91b5840353SAdam Hornáček */ newXref(Reader reader)92b5840353SAdam Hornáček protected abstract Xrefer newXref(Reader reader); 93b5840353SAdam Hornáček 94d051e170SChris Fraire /** 95d051e170SChris Fraire * Gets a BOM-stripped {@link Reader} (default UTF-8 charset) of the 96d051e170SChris Fraire * specified {@code stream}, wrapped in a {@link ZeroReader}. 97d051e170SChris Fraire */ getReader(InputStream stream)98b5840353SAdam Hornáček protected Reader getReader(InputStream stream) throws IOException { 99d051e170SChris Fraire // sourceRoot is read with UTF-8 as a default. 100d051e170SChris Fraire return new ZeroReader(IOUtils.createBOMStrippedReader(stream, 101d051e170SChris Fraire StandardCharsets.UTF_8.name())); 102b5840353SAdam Hornáček } 103b5840353SAdam Hornáček } 104