xref: /OpenGrok/opengrok-indexer/src/test/java/org/opengrok/indexer/analysis/AnalyzerGuruTest.java (revision 52d10766ed1db3b0fd2c59a0da7292a32f244b50)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23  */
24 package org.opengrok.indexer.analysis;
25 
26 import java.io.ByteArrayInputStream;
27 import java.io.ByteArrayOutputStream;
28 import java.io.File;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.nio.charset.StandardCharsets;
32 import java.util.Map;
33 import java.util.jar.JarEntry;
34 import java.util.jar.JarOutputStream;
35 import java.util.zip.ZipEntry;
36 import java.util.zip.ZipOutputStream;
37 
38 import org.junit.jupiter.api.Test;
39 import org.opengrok.indexer.analysis.archive.ZipAnalyzer;
40 import org.opengrok.indexer.analysis.c.CxxAnalyzerFactory;
41 import org.opengrok.indexer.analysis.document.MandocAnalyzer;
42 import org.opengrok.indexer.analysis.document.TroffAnalyzer;
43 import org.opengrok.indexer.analysis.executables.ELFAnalyzer;
44 import org.opengrok.indexer.analysis.executables.JarAnalyzer;
45 import org.opengrok.indexer.analysis.executables.JavaClassAnalyzer;
46 import org.opengrok.indexer.analysis.perl.PerlAnalyzer;
47 import org.opengrok.indexer.analysis.plain.PlainAnalyzer;
48 import org.opengrok.indexer.analysis.plain.XMLAnalyzer;
49 import org.opengrok.indexer.analysis.sh.ShAnalyzer;
50 import org.opengrok.indexer.analysis.sh.ShAnalyzerFactory;
51 
52 import static org.junit.jupiter.api.Assertions.assertEquals;
53 import static org.junit.jupiter.api.Assertions.assertNotEquals;
54 import static org.junit.jupiter.api.Assertions.assertNotNull;
55 import static org.junit.jupiter.api.Assertions.assertNotSame;
56 import static org.junit.jupiter.api.Assertions.assertNull;
57 import static org.junit.jupiter.api.Assertions.assertSame;
58 import static org.junit.jupiter.api.Assertions.assertTrue;
59 
60 /**
61  * Tests for the functionality provided by the AnalyzerGuru class.
62  */
63 public class AnalyzerGuruTest {
64 
65     @Test
testGetFileTypeDescriptions()66     public void testGetFileTypeDescriptions() {
67         Map<String, String> map = AnalyzerGuru.getfileTypeDescriptions();
68         assertTrue(map.size() > 0);
69     }
70 
71     /**
72      * Test that we get the correct analyzer if the file name exactly matches a
73      * known extension.
74      */
75     @Test
testFileNameSameAsExtension()76     public void testFileNameSameAsExtension() throws Exception {
77         ByteArrayInputStream in = new ByteArrayInputStream(
78                 "#!/bin/sh\nexec /usr/bin/zip \"$@\"\n".getBytes(StandardCharsets.US_ASCII));
79         String file = "/dummy/path/to/source/zip";
80         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, file);
81         assertSame(ShAnalyzer.class, fa.getClass());
82     }
83 
84     @Test
testUTF8ByteOrderMark()85     public void testUTF8ByteOrderMark() throws Exception {
86         byte[] xml = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM
87                 '<', '?', 'x', 'm', 'l', ' ',
88                 'v', 'e', 'r', 's', 'i', 'o', 'n', '=',
89                 '"', '1', '.', '0', '"', '?', '>'};
90         ByteArrayInputStream in = new ByteArrayInputStream(xml);
91         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
92         assertSame(XMLAnalyzer.class, fa.getClass());
93     }
94 
95     @Test
testUTF8ByteOrderMarkPlusCopyrightSymbol()96     public void testUTF8ByteOrderMarkPlusCopyrightSymbol() throws Exception {
97         byte[] doc = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM
98                 '/', '/', ' ', (byte) 0xC2, (byte) 0xA9};
99         ByteArrayInputStream in = new ByteArrayInputStream(doc);
100         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
101         assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,");
102     }
103 
104     @Test
testUTF8ByteOrderMarkPlainFile()105     public void testUTF8ByteOrderMarkPlainFile() throws Exception {
106         byte[] bytes = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM
107                 'h', 'e', 'l', 'l', 'o', ' ',
108                 'w', 'o', 'r', 'l', 'd'};
109 
110         ByteArrayInputStream in = new ByteArrayInputStream(bytes);
111         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
112         assertSame(PlainAnalyzer.class, fa.getClass());
113     }
114 
115     @Test
testUTF16BigByteOrderMarkPlusCopyrightSymbol()116     public void testUTF16BigByteOrderMarkPlusCopyrightSymbol() throws Exception {
117         byte[] doc = {(byte) 0xFE, (byte) 0xFF, // UTF-16BE BOM
118                 0, '#', 0, ' ', (byte) 0xC2, (byte) 0xA9};
119         ByteArrayInputStream in = new ByteArrayInputStream(doc);
120         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
121         assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,");
122     }
123 
124     @Test
testUTF16LittleByteOrderMarkPlusCopyrightSymbol()125     public void testUTF16LittleByteOrderMarkPlusCopyrightSymbol() throws Exception {
126         byte[] doc = {(byte) 0xFF, (byte) 0xFE, // UTF-16BE BOM
127                 '#', 0, ' ', 0, (byte) 0xA9, (byte) 0xC2};
128         ByteArrayInputStream in = new ByteArrayInputStream(doc);
129         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
130         assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,");
131     }
132 
133     @Test
addExtension()134     public void addExtension() throws Exception {
135         // should not find analyzer for this unlikely extension
136         assertNull(AnalyzerGuru.find("file.unlikely_extension"));
137 
138         AnalyzerFactory
139                 faf = AnalyzerGuru.findFactory(ShAnalyzerFactory.class.getName());
140         // should be the same factory as the built-in analyzer for sh scripts
141         assertSame(AnalyzerGuru.find("myscript.sh"), faf);
142 
143         // add an analyzer for the extension and see that it is picked up
144         AnalyzerGuru.addExtension("UNLIKELY_EXTENSION", faf);
145         assertSame(ShAnalyzerFactory.class, AnalyzerGuru.find("file.unlikely_extension").getClass());
146 
147         // remove the mapping and verify that it is gone
148         AnalyzerGuru.addExtension("UNLIKELY_EXTENSION", null);
149         assertNull(AnalyzerGuru.find("file.unlikely_extension"));
150     }
151 
152     @Test
addPrefix()153     public void addPrefix() throws Exception {
154         // should not find analyzer for this unlikely extension
155         assertNull(AnalyzerGuru.find("unlikely_prefix.foo"));
156 
157         AnalyzerFactory
158                 faf = AnalyzerGuru.findFactory(ShAnalyzerFactory.class.getName());
159         // should be the same factory as the built-in analyzer for sh scripts
160         assertSame(AnalyzerGuru.find("myscript.sh"), faf);
161 
162         // add an analyzer for the prefix and see that it is picked up
163         AnalyzerGuru.addPrefix("UNLIKELY_PREFIX", faf);
164         assertSame(ShAnalyzerFactory.class, AnalyzerGuru.find("unlikely_prefix.foo").getClass());
165 
166         // remove the mapping and verify that it is gone
167         AnalyzerGuru.addPrefix("UNLIKELY_PREFIX", null);
168         assertNull(AnalyzerGuru.find("unlikely_prefix.foo"));
169     }
170 
171     @Test
testZip()172     public void testZip() throws IOException {
173         ByteArrayOutputStream baos = new ByteArrayOutputStream();
174         ZipOutputStream zos = new ZipOutputStream(baos);
175         zos.putNextEntry(new ZipEntry("dummy"));
176         zos.closeEntry();
177         zos.close();
178         InputStream in = new ByteArrayInputStream(baos.toByteArray());
179         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "dummy");
180         assertSame(ZipAnalyzer.class, fa.getClass());
181     }
182 
183     @Test
testJar()184     public void testJar() throws IOException {
185         ByteArrayOutputStream baos = new ByteArrayOutputStream();
186         JarOutputStream jos = new JarOutputStream(baos);
187         jos.putNextEntry(new JarEntry("dummy"));
188         jos.closeEntry();
189         jos.close();
190         InputStream in = new ByteArrayInputStream(baos.toByteArray());
191         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "dummy");
192         assertSame(JarAnalyzer.class, fa.getClass());
193     }
194 
195     @Test
testPlainText()196     public void testPlainText() throws IOException {
197         ByteArrayInputStream in = new ByteArrayInputStream(
198                 "This is a plain text file.".getBytes(StandardCharsets.US_ASCII));
199         assertSame(PlainAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass());
200     }
201 
202     @Test
rfe2969()203     public void rfe2969() {
204         AnalyzerFactory faf = AnalyzerGuru.find("foo.hxx");
205         assertNotNull(faf);
206         assertSame(CxxAnalyzerFactory.class, faf.getClass());
207     }
208 
209     @Test
rfe3401()210     public void rfe3401() {
211         AnalyzerFactory f1 = AnalyzerGuru.find("main.c");
212         assertNotNull(f1);
213         AnalyzerFactory f2 = AnalyzerGuru.find("main.cc");
214         assertNotNull(f2);
215         assertNotSame(f1.getClass(), f2.getClass());
216     }
217 
218     /**
219      * Test that matching of full names works. Bug #859.
220      */
221     @Test
matchesFullName()222     public void matchesFullName() {
223         String s = File.separator;  // so test works on Unix and Windows
224         String path = s + "path" + s + "to" + s + "Makefile";
225         AnalyzerFactory faf = AnalyzerGuru.find(path);
226         assertSame(ShAnalyzerFactory.class, faf.getClass());
227         faf = AnalyzerGuru.find("GNUMakefile");
228         assertSame(ShAnalyzerFactory.class, faf.getClass());
229     }
230 
231     /**
232      * Test for obtaining a language analyzer's factory class.
233      * This should not fail even if package names change.
234      * The only assumptions made is that all the language analyzer
235      * and factory names follow the pattern:
236      * <p>
237      * language + "Analyzer",  and
238      * language + "AnalyzerFactory"
239      */
240     @Test
getAnalyzerFactoryClass()241     public void getAnalyzerFactoryClass() {
242         Class<?> fcForSh = AnalyzerGuru.getFactoryClass("Sh");
243         Class<?> fcForShAnalyzer = AnalyzerGuru.getFactoryClass("ShAnalyzer");
244         Class<?> fcSimpleName = AnalyzerGuru.getFactoryClass("ShAnalyzerFactory");
245         assertEquals(ShAnalyzerFactory.class, fcForSh);
246         assertEquals(ShAnalyzerFactory.class, fcForShAnalyzer);
247         assertEquals(ShAnalyzerFactory.class, fcSimpleName);
248 
249         Class<?> fc = AnalyzerGuru.getFactoryClass("UnknownAnalyzerFactory");
250         assertNull(fc);
251     }
252 
253     @Test
shouldNotThrowGettingCsprojOpening()254     public void shouldNotThrowGettingCsprojOpening() throws IOException {
255         InputStream res = getClass().getClassLoader().getResourceAsStream("analysis/a.csproj");
256         assertNotNull(res, "despite embedded a.csproj,");
257         assertSame(XMLAnalyzer.class, AnalyzerGuru.getAnalyzer(res, "dummy").getClass(), "despite normal a.csproj,");
258     }
259 
260     @Test
shouldMatchPerlHashbang()261     public void shouldMatchPerlHashbang() throws IOException {
262         ByteArrayInputStream in = new ByteArrayInputStream(
263                 "#!/usr/bin/perl -w".getBytes(StandardCharsets.US_ASCII));
264         assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite Perl hashbang,");
265     }
266 
267     @Test
shouldMatchPerlHashbangSpaced()268     public void shouldMatchPerlHashbangSpaced() throws IOException {
269         ByteArrayInputStream in = new ByteArrayInputStream(
270                 "\n\t #!  /usr/bin/perl -w".getBytes(StandardCharsets.US_ASCII));
271         assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite Perl hashbang,");
272     }
273 
274     @Test
shouldMatchEnvPerlHashbang()275     public void shouldMatchEnvPerlHashbang() throws IOException {
276         ByteArrayInputStream in = new ByteArrayInputStream(
277                 "#!/usr/bin/env perl -w".getBytes(StandardCharsets.US_ASCII));
278         assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite env hashbang with perl,");
279     }
280 
281     @Test
shouldMatchEnvPerlHashbangSpaced()282     public void shouldMatchEnvPerlHashbangSpaced() throws IOException {
283         ByteArrayInputStream in = new ByteArrayInputStream(
284                 "\n\t #!  /usr/bin/env\t perl -w".getBytes(StandardCharsets.US_ASCII));
285         assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(),
286                 "despite env hashbang with perl,");
287     }
288 
289     @Test
shouldNotMatchEnvLFPerlHashbang()290     public void shouldNotMatchEnvLFPerlHashbang() throws IOException {
291         ByteArrayInputStream in = new ByteArrayInputStream(
292                 "#!/usr/bin/env\nperl".getBytes(StandardCharsets.US_ASCII));
293         assertNotSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite env hashbang LF,");
294     }
295 
296     @Test
shouldMatchELFMagic()297     public void shouldMatchELFMagic() throws Exception {
298         byte[] elfmt = {(byte) 0x7F, 'E', 'L', 'F', (byte) 2, (byte) 2, (byte) 1,
299                 (byte) 0x06};
300         ByteArrayInputStream in = new ByteArrayInputStream(elfmt);
301         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
302         assertSame(ELFAnalyzer.class, fa.getClass(), "despite \\177ELF magic,");
303     }
304 
305     @Test
shouldMatchJavaClassMagic()306     public void shouldMatchJavaClassMagic() throws Exception {
307         String oldMagic = "\312\376\272\276";      // cafebabe?
308         String newMagic = new String(new byte[] {(byte) 0xCA, (byte) 0xFE,
309                 (byte) 0xBA, (byte) 0xBE}, StandardCharsets.UTF_8);
310         assertNotEquals(oldMagic, newMagic, "despite octal string, escape it as unicode,");
311 
312         // 0xCAFEBABE (4), minor (2), major (2)
313         byte[] dotclass = {(byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE,
314                 (byte) 0, (byte) 1, (byte) 0, (byte) 0x34};
315         ByteArrayInputStream in = new ByteArrayInputStream(dotclass);
316         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
317         assertSame(JavaClassAnalyzer.class, fa.getClass(), "despite 0xCAFEBABE magic,");
318     }
319 
320     @Test
shouldMatchTroffMagic()321     public void shouldMatchTroffMagic() throws Exception {
322         byte[] mandoc = {' ', '\n', '.', '\"', '\n', '.', 'T', 'H', (byte) 0x20, '\n'};
323         ByteArrayInputStream in = new ByteArrayInputStream(mandoc);
324         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
325         assertSame(TroffAnalyzer.class, fa.getClass(), "despite .TH magic,");
326     }
327 
328     @Test
shouldMatchMandocMagic()329     public void shouldMatchMandocMagic() throws Exception {
330         byte[] mandoc = {'\n', ' ', '.', '\"', '\n', '.', 'D', 'd', (byte) 0x20, '\n'};
331         ByteArrayInputStream in = new ByteArrayInputStream(mandoc);
332         AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file");
333         assertSame(MandocAnalyzer.class, fa.getClass(), "despite .Dd magic,");
334     }
335 }
336