1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, 2018, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.analysis; 25 26 import java.util.Collections; 27 import java.util.List; 28 29 public abstract class AnalyzerFactory { 30 /** 31 * Cached analyzer object for the current thread (analyzer objects can be 32 * expensive to allocate). 33 */ 34 protected final ThreadLocal<AbstractAnalyzer> cachedAnalyzer; 35 /** 36 * List of file names on which this kind of analyzer should be used. 37 */ 38 protected List<String> names; 39 /** 40 * List of file prefixes on which this kind of analyzer should be 41 * used. 42 */ 43 protected List<String> prefixes; 44 /** 45 * List of file extensions on which this kind of analyzer should be 46 * used. 47 */ 48 protected List<String> suffixes; 49 /** 50 * List of magic strings used to recognize files on which this kind of 51 * analyzer should be used. 52 */ 53 protected List<String> magics; 54 /** 55 * List of matchers which delegate files to different types of 56 * analyzers. 57 */ 58 protected final List<FileAnalyzerFactory.Matcher> matchers; 59 /** 60 * The content type for the files recognized by this kind of analyzer. 61 */ 62 protected final String contentType; 63 /** 64 * The genre for files recognized by this kind of analyzer. 65 */ 66 protected AbstractAnalyzer.Genre genre; 67 AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType)68 protected AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType) { 69 cachedAnalyzer = new ThreadLocal<>(); 70 if (matcher == null) { 71 this.matchers = Collections.emptyList(); 72 } else { 73 this.matchers = Collections.singletonList(matcher); 74 } 75 this.contentType = contentType; 76 } 77 78 /** 79 * Get the list of file names recognized by this analyzer (names must 80 * match exactly, ignoring case). 81 * 82 * @return list of file names 83 */ getFileNames()84 final List<String> getFileNames() { 85 return names; 86 } 87 88 /** 89 * Get the list of file prefixes recognized by this analyzer. 90 * 91 * @return list of prefixes 92 */ getPrefixes()93 final List<String> getPrefixes() { 94 return prefixes; 95 } 96 97 /** 98 * Get the list of file extensions recognized by this analyzer. 99 * 100 * @return list of suffixes 101 */ getSuffixes()102 final List<String> getSuffixes() { 103 return suffixes; 104 } 105 106 /** 107 * Get the list of magic strings recognized by this analyzer. If a file 108 * starts with one of these strings, an analyzer created by this factory 109 * should be used to analyze it. 110 * 111 * <p><b>Note:</b> Currently this assumes that the file is encoded with 112 * UTF-8 unless a BOM is detected. 113 * 114 * @return list of magic strings 115 */ getMagicStrings()116 final List<String> getMagicStrings() { 117 return magics; 118 } 119 120 /** 121 * Get matchers that map file contents to analyzer factories 122 * programmatically. 123 * 124 * @return list of matchers 125 */ getMatchers()126 final List<FileAnalyzerFactory.Matcher> getMatchers() { 127 return matchers; 128 } 129 130 /** 131 * Get the content type (MIME type) for analyzers returned by this factory. 132 * 133 * @return content type (could be {@code null} if it is unknown) 134 */ getContentType()135 final String getContentType() { 136 return contentType; 137 } 138 139 /** 140 * The genre this analyzer factory belongs to. 141 * 142 * @return a genre 143 */ getGenre()144 public final AbstractAnalyzer.Genre getGenre() { 145 return genre; 146 } 147 148 /** 149 * The user friendly name of this analyzer. 150 * 151 * @return a genre 152 */ getName()153 public abstract String getName(); 154 getAnalyzer()155 public abstract AbstractAnalyzer getAnalyzer(); 156 returnAnalyzer()157 public abstract void returnAnalyzer(); 158 newAnalyzer()159 protected abstract AbstractAnalyzer newAnalyzer(); 160 } 161