xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerFactory.java (revision 110674a804dc9faa9a665dbac513689aaad3eca2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, 2018, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.analysis;
25 
26 import java.util.Collections;
27 import java.util.List;
28 
29 public abstract class AnalyzerFactory {
30     /**
31      * Cached analyzer object for the current thread (analyzer objects can be
32      * expensive to allocate).
33      */
34     protected final ThreadLocal<AbstractAnalyzer> cachedAnalyzer;
35     /**
36      * List of file names on which this kind of analyzer should be used.
37      */
38     protected List<String> names;
39     /**
40      * List of file prefixes on which this kind of analyzer should be
41      * used.
42      */
43     protected List<String> prefixes;
44     /**
45      * List of file extensions on which this kind of analyzer should be
46      * used.
47      */
48     protected List<String> suffixes;
49     /**
50      * List of magic strings used to recognize files on which this kind of
51      * analyzer should be used.
52      */
53     protected List<String> magics;
54     /**
55      * List of matchers which delegate files to different types of
56      * analyzers.
57      */
58     protected final List<FileAnalyzerFactory.Matcher> matchers;
59     /**
60      * The content type for the files recognized by this kind of analyzer.
61      */
62     protected final String contentType;
63     /**
64      * The genre for files recognized by this kind of analyzer.
65      */
66     protected AbstractAnalyzer.Genre genre;
67 
AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType)68     protected AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType) {
69         cachedAnalyzer = new ThreadLocal<>();
70         if (matcher == null) {
71             this.matchers = Collections.emptyList();
72         } else {
73             this.matchers = Collections.singletonList(matcher);
74         }
75         this.contentType = contentType;
76     }
77 
78     /**
79      * Get the list of file names recognized by this analyzer (names must
80      * match exactly, ignoring case).
81      *
82      * @return list of file names
83      */
getFileNames()84     final List<String> getFileNames() {
85         return names;
86     }
87 
88     /**
89      * Get the list of file prefixes recognized by this analyzer.
90      *
91      * @return list of prefixes
92      */
getPrefixes()93     final List<String> getPrefixes() {
94         return prefixes;
95     }
96 
97     /**
98      * Get the list of file extensions recognized by this analyzer.
99      *
100      * @return list of suffixes
101      */
getSuffixes()102     final List<String> getSuffixes() {
103         return suffixes;
104     }
105 
106     /**
107      * Get the list of magic strings recognized by this analyzer. If a file
108      * starts with one of these strings, an analyzer created by this factory
109      * should be used to analyze it.
110      *
111      * <p><b>Note:</b> Currently this assumes that the file is encoded with
112      * UTF-8 unless a BOM is detected.
113      *
114      * @return list of magic strings
115      */
getMagicStrings()116     final List<String> getMagicStrings() {
117         return magics;
118     }
119 
120     /**
121      * Get matchers that map file contents to analyzer factories
122      * programmatically.
123      *
124      * @return list of matchers
125      */
getMatchers()126     final List<FileAnalyzerFactory.Matcher> getMatchers() {
127         return matchers;
128     }
129 
130     /**
131      * Get the content type (MIME type) for analyzers returned by this factory.
132      *
133      * @return content type (could be {@code null} if it is unknown)
134      */
getContentType()135     final String getContentType() {
136         return contentType;
137     }
138 
139     /**
140      * The genre this analyzer factory belongs to.
141      *
142      * @return a genre
143      */
getGenre()144     public final AbstractAnalyzer.Genre getGenre() {
145         return genre;
146     }
147 
148     /**
149      * The user friendly name of this analyzer.
150      *
151      * @return a genre
152      */
getName()153     public abstract String getName();
154 
getAnalyzer()155     public abstract AbstractAnalyzer getAnalyzer();
156 
returnAnalyzer()157     public abstract void returnAnalyzer();
158 
newAnalyzer()159     protected abstract AbstractAnalyzer newAnalyzer();
160 }
161