1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved. 22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>. 23 */ 24 package org.opengrok.indexer.analysis; 25 26 import java.io.ByteArrayInputStream; 27 import java.io.ByteArrayOutputStream; 28 import java.io.File; 29 import java.io.IOException; 30 import java.io.InputStream; 31 import java.nio.charset.StandardCharsets; 32 import java.util.Map; 33 import java.util.jar.JarEntry; 34 import java.util.jar.JarOutputStream; 35 import java.util.zip.ZipEntry; 36 import java.util.zip.ZipOutputStream; 37 38 import org.junit.jupiter.api.Test; 39 import org.opengrok.indexer.analysis.archive.ZipAnalyzer; 40 import org.opengrok.indexer.analysis.c.CxxAnalyzerFactory; 41 import org.opengrok.indexer.analysis.document.MandocAnalyzer; 42 import org.opengrok.indexer.analysis.document.TroffAnalyzer; 43 import org.opengrok.indexer.analysis.executables.ELFAnalyzer; 44 import org.opengrok.indexer.analysis.executables.JarAnalyzer; 45 import org.opengrok.indexer.analysis.executables.JavaClassAnalyzer; 46 import org.opengrok.indexer.analysis.perl.PerlAnalyzer; 47 import org.opengrok.indexer.analysis.plain.PlainAnalyzer; 48 import org.opengrok.indexer.analysis.plain.XMLAnalyzer; 49 import org.opengrok.indexer.analysis.sh.ShAnalyzer; 50 import org.opengrok.indexer.analysis.sh.ShAnalyzerFactory; 51 52 import static org.junit.jupiter.api.Assertions.assertEquals; 53 import static org.junit.jupiter.api.Assertions.assertNotEquals; 54 import static org.junit.jupiter.api.Assertions.assertNotNull; 55 import static org.junit.jupiter.api.Assertions.assertNotSame; 56 import static org.junit.jupiter.api.Assertions.assertNull; 57 import static org.junit.jupiter.api.Assertions.assertSame; 58 import static org.junit.jupiter.api.Assertions.assertTrue; 59 60 /** 61 * Tests for the functionality provided by the AnalyzerGuru class. 62 */ 63 public class AnalyzerGuruTest { 64 65 @Test testGetFileTypeDescriptions()66 public void testGetFileTypeDescriptions() { 67 Map<String, String> map = AnalyzerGuru.getfileTypeDescriptions(); 68 assertTrue(map.size() > 0); 69 } 70 71 /** 72 * Test that we get the correct analyzer if the file name exactly matches a 73 * known extension. 74 */ 75 @Test testFileNameSameAsExtension()76 public void testFileNameSameAsExtension() throws Exception { 77 ByteArrayInputStream in = new ByteArrayInputStream( 78 "#!/bin/sh\nexec /usr/bin/zip \"$@\"\n".getBytes(StandardCharsets.US_ASCII)); 79 String file = "/dummy/path/to/source/zip"; 80 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, file); 81 assertSame(ShAnalyzer.class, fa.getClass()); 82 } 83 84 @Test testUTF8ByteOrderMark()85 public void testUTF8ByteOrderMark() throws Exception { 86 byte[] xml = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM 87 '<', '?', 'x', 'm', 'l', ' ', 88 'v', 'e', 'r', 's', 'i', 'o', 'n', '=', 89 '"', '1', '.', '0', '"', '?', '>'}; 90 ByteArrayInputStream in = new ByteArrayInputStream(xml); 91 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 92 assertSame(XMLAnalyzer.class, fa.getClass()); 93 } 94 95 @Test testUTF8ByteOrderMarkPlusCopyrightSymbol()96 public void testUTF8ByteOrderMarkPlusCopyrightSymbol() throws Exception { 97 byte[] doc = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM 98 '/', '/', ' ', (byte) 0xC2, (byte) 0xA9}; 99 ByteArrayInputStream in = new ByteArrayInputStream(doc); 100 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 101 assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,"); 102 } 103 104 @Test testUTF8ByteOrderMarkPlainFile()105 public void testUTF8ByteOrderMarkPlainFile() throws Exception { 106 byte[] bytes = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF, // UTF-8 BOM 107 'h', 'e', 'l', 'l', 'o', ' ', 108 'w', 'o', 'r', 'l', 'd'}; 109 110 ByteArrayInputStream in = new ByteArrayInputStream(bytes); 111 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 112 assertSame(PlainAnalyzer.class, fa.getClass()); 113 } 114 115 @Test testUTF16BigByteOrderMarkPlusCopyrightSymbol()116 public void testUTF16BigByteOrderMarkPlusCopyrightSymbol() throws Exception { 117 byte[] doc = {(byte) 0xFE, (byte) 0xFF, // UTF-16BE BOM 118 0, '#', 0, ' ', (byte) 0xC2, (byte) 0xA9}; 119 ByteArrayInputStream in = new ByteArrayInputStream(doc); 120 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 121 assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,"); 122 } 123 124 @Test testUTF16LittleByteOrderMarkPlusCopyrightSymbol()125 public void testUTF16LittleByteOrderMarkPlusCopyrightSymbol() throws Exception { 126 byte[] doc = {(byte) 0xFF, (byte) 0xFE, // UTF-16BE BOM 127 '#', 0, ' ', 0, (byte) 0xA9, (byte) 0xC2}; 128 ByteArrayInputStream in = new ByteArrayInputStream(doc); 129 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 130 assertSame(PlainAnalyzer.class, fa.getClass(), "despite BOM as precise match,"); 131 } 132 133 @Test addExtension()134 public void addExtension() throws Exception { 135 // should not find analyzer for this unlikely extension 136 assertNull(AnalyzerGuru.find("file.unlikely_extension")); 137 138 AnalyzerFactory 139 faf = AnalyzerGuru.findFactory(ShAnalyzerFactory.class.getName()); 140 // should be the same factory as the built-in analyzer for sh scripts 141 assertSame(AnalyzerGuru.find("myscript.sh"), faf); 142 143 // add an analyzer for the extension and see that it is picked up 144 AnalyzerGuru.addExtension("UNLIKELY_EXTENSION", faf); 145 assertSame(ShAnalyzerFactory.class, AnalyzerGuru.find("file.unlikely_extension").getClass()); 146 147 // remove the mapping and verify that it is gone 148 AnalyzerGuru.addExtension("UNLIKELY_EXTENSION", null); 149 assertNull(AnalyzerGuru.find("file.unlikely_extension")); 150 } 151 152 @Test addPrefix()153 public void addPrefix() throws Exception { 154 // should not find analyzer for this unlikely extension 155 assertNull(AnalyzerGuru.find("unlikely_prefix.foo")); 156 157 AnalyzerFactory 158 faf = AnalyzerGuru.findFactory(ShAnalyzerFactory.class.getName()); 159 // should be the same factory as the built-in analyzer for sh scripts 160 assertSame(AnalyzerGuru.find("myscript.sh"), faf); 161 162 // add an analyzer for the prefix and see that it is picked up 163 AnalyzerGuru.addPrefix("UNLIKELY_PREFIX", faf); 164 assertSame(ShAnalyzerFactory.class, AnalyzerGuru.find("unlikely_prefix.foo").getClass()); 165 166 // remove the mapping and verify that it is gone 167 AnalyzerGuru.addPrefix("UNLIKELY_PREFIX", null); 168 assertNull(AnalyzerGuru.find("unlikely_prefix.foo")); 169 } 170 171 @Test testZip()172 public void testZip() throws IOException { 173 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 174 ZipOutputStream zos = new ZipOutputStream(baos); 175 zos.putNextEntry(new ZipEntry("dummy")); 176 zos.closeEntry(); 177 zos.close(); 178 InputStream in = new ByteArrayInputStream(baos.toByteArray()); 179 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "dummy"); 180 assertSame(ZipAnalyzer.class, fa.getClass()); 181 } 182 183 @Test testJar()184 public void testJar() throws IOException { 185 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 186 JarOutputStream jos = new JarOutputStream(baos); 187 jos.putNextEntry(new JarEntry("dummy")); 188 jos.closeEntry(); 189 jos.close(); 190 InputStream in = new ByteArrayInputStream(baos.toByteArray()); 191 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "dummy"); 192 assertSame(JarAnalyzer.class, fa.getClass()); 193 } 194 195 @Test testPlainText()196 public void testPlainText() throws IOException { 197 ByteArrayInputStream in = new ByteArrayInputStream( 198 "This is a plain text file.".getBytes(StandardCharsets.US_ASCII)); 199 assertSame(PlainAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass()); 200 } 201 202 @Test rfe2969()203 public void rfe2969() { 204 AnalyzerFactory faf = AnalyzerGuru.find("foo.hxx"); 205 assertNotNull(faf); 206 assertSame(CxxAnalyzerFactory.class, faf.getClass()); 207 } 208 209 @Test rfe3401()210 public void rfe3401() { 211 AnalyzerFactory f1 = AnalyzerGuru.find("main.c"); 212 assertNotNull(f1); 213 AnalyzerFactory f2 = AnalyzerGuru.find("main.cc"); 214 assertNotNull(f2); 215 assertNotSame(f1.getClass(), f2.getClass()); 216 } 217 218 /** 219 * Test that matching of full names works. Bug #859. 220 */ 221 @Test matchesFullName()222 public void matchesFullName() { 223 String s = File.separator; // so test works on Unix and Windows 224 String path = s + "path" + s + "to" + s + "Makefile"; 225 AnalyzerFactory faf = AnalyzerGuru.find(path); 226 assertSame(ShAnalyzerFactory.class, faf.getClass()); 227 faf = AnalyzerGuru.find("GNUMakefile"); 228 assertSame(ShAnalyzerFactory.class, faf.getClass()); 229 } 230 231 /** 232 * Test for obtaining a language analyzer's factory class. 233 * This should not fail even if package names change. 234 * The only assumptions made is that all the language analyzer 235 * and factory names follow the pattern: 236 * <p> 237 * language + "Analyzer", and 238 * language + "AnalyzerFactory" 239 */ 240 @Test getAnalyzerFactoryClass()241 public void getAnalyzerFactoryClass() { 242 Class<?> fcForSh = AnalyzerGuru.getFactoryClass("Sh"); 243 Class<?> fcForShAnalyzer = AnalyzerGuru.getFactoryClass("ShAnalyzer"); 244 Class<?> fcSimpleName = AnalyzerGuru.getFactoryClass("ShAnalyzerFactory"); 245 assertEquals(ShAnalyzerFactory.class, fcForSh); 246 assertEquals(ShAnalyzerFactory.class, fcForShAnalyzer); 247 assertEquals(ShAnalyzerFactory.class, fcSimpleName); 248 249 Class<?> fc = AnalyzerGuru.getFactoryClass("UnknownAnalyzerFactory"); 250 assertNull(fc); 251 } 252 253 @Test shouldNotThrowGettingCsprojOpening()254 public void shouldNotThrowGettingCsprojOpening() throws IOException { 255 InputStream res = getClass().getClassLoader().getResourceAsStream("analysis/a.csproj"); 256 assertNotNull(res, "despite embedded a.csproj,"); 257 assertSame(XMLAnalyzer.class, AnalyzerGuru.getAnalyzer(res, "dummy").getClass(), "despite normal a.csproj,"); 258 } 259 260 @Test shouldMatchPerlHashbang()261 public void shouldMatchPerlHashbang() throws IOException { 262 ByteArrayInputStream in = new ByteArrayInputStream( 263 "#!/usr/bin/perl -w".getBytes(StandardCharsets.US_ASCII)); 264 assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite Perl hashbang,"); 265 } 266 267 @Test shouldMatchPerlHashbangSpaced()268 public void shouldMatchPerlHashbangSpaced() throws IOException { 269 ByteArrayInputStream in = new ByteArrayInputStream( 270 "\n\t #! /usr/bin/perl -w".getBytes(StandardCharsets.US_ASCII)); 271 assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite Perl hashbang,"); 272 } 273 274 @Test shouldMatchEnvPerlHashbang()275 public void shouldMatchEnvPerlHashbang() throws IOException { 276 ByteArrayInputStream in = new ByteArrayInputStream( 277 "#!/usr/bin/env perl -w".getBytes(StandardCharsets.US_ASCII)); 278 assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite env hashbang with perl,"); 279 } 280 281 @Test shouldMatchEnvPerlHashbangSpaced()282 public void shouldMatchEnvPerlHashbangSpaced() throws IOException { 283 ByteArrayInputStream in = new ByteArrayInputStream( 284 "\n\t #! /usr/bin/env\t perl -w".getBytes(StandardCharsets.US_ASCII)); 285 assertSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), 286 "despite env hashbang with perl,"); 287 } 288 289 @Test shouldNotMatchEnvLFPerlHashbang()290 public void shouldNotMatchEnvLFPerlHashbang() throws IOException { 291 ByteArrayInputStream in = new ByteArrayInputStream( 292 "#!/usr/bin/env\nperl".getBytes(StandardCharsets.US_ASCII)); 293 assertNotSame(PerlAnalyzer.class, AnalyzerGuru.getAnalyzer(in, "dummy").getClass(), "despite env hashbang LF,"); 294 } 295 296 @Test shouldMatchELFMagic()297 public void shouldMatchELFMagic() throws Exception { 298 byte[] elfmt = {(byte) 0x7F, 'E', 'L', 'F', (byte) 2, (byte) 2, (byte) 1, 299 (byte) 0x06}; 300 ByteArrayInputStream in = new ByteArrayInputStream(elfmt); 301 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 302 assertSame(ELFAnalyzer.class, fa.getClass(), "despite \\177ELF magic,"); 303 } 304 305 @Test shouldMatchJavaClassMagic()306 public void shouldMatchJavaClassMagic() throws Exception { 307 String oldMagic = "\312\376\272\276"; // cafebabe? 308 String newMagic = new String(new byte[] {(byte) 0xCA, (byte) 0xFE, 309 (byte) 0xBA, (byte) 0xBE}, StandardCharsets.UTF_8); 310 assertNotEquals(oldMagic, newMagic, "despite octal string, escape it as unicode,"); 311 312 // 0xCAFEBABE (4), minor (2), major (2) 313 byte[] dotclass = {(byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE, 314 (byte) 0, (byte) 1, (byte) 0, (byte) 0x34}; 315 ByteArrayInputStream in = new ByteArrayInputStream(dotclass); 316 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 317 assertSame(JavaClassAnalyzer.class, fa.getClass(), "despite 0xCAFEBABE magic,"); 318 } 319 320 @Test shouldMatchTroffMagic()321 public void shouldMatchTroffMagic() throws Exception { 322 byte[] mandoc = {' ', '\n', '.', '\"', '\n', '.', 'T', 'H', (byte) 0x20, '\n'}; 323 ByteArrayInputStream in = new ByteArrayInputStream(mandoc); 324 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 325 assertSame(TroffAnalyzer.class, fa.getClass(), "despite .TH magic,"); 326 } 327 328 @Test shouldMatchMandocMagic()329 public void shouldMatchMandocMagic() throws Exception { 330 byte[] mandoc = {'\n', ' ', '.', '\"', '\n', '.', 'D', 'd', (byte) 0x20, '\n'}; 331 ByteArrayInputStream in = new ByteArrayInputStream(mandoc); 332 AbstractAnalyzer fa = AnalyzerGuru.getAnalyzer(in, "/dummy/file"); 333 assertSame(MandocAnalyzer.class, fa.getClass(), "despite .Dd magic,"); 334 } 335 } 336