xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/IOUtils.java (revision d6df19e1b22784c78f567cf74c42f18e3901b900)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
22  * Copyright (c) 2011, Trond Norbye.
23  * Portions Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>.
24  */
25 package org.opengrok.indexer.util;
26 
27 import java.io.BufferedInputStream;
28 import java.io.Closeable;
29 import java.io.File;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.InputStreamReader;
33 import java.io.Reader;
34 import java.nio.charset.Charset;
35 import java.nio.charset.StandardCharsets;
36 import java.nio.file.FileVisitResult;
37 import java.nio.file.Files;
38 import java.nio.file.Path;
39 import java.nio.file.SimpleFileVisitor;
40 import java.nio.file.attribute.BasicFileAttributes;
41 import java.util.ArrayList;
42 import java.util.Arrays;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.logging.Level;
46 import java.util.logging.Logger;
47 import org.opengrok.indexer.logger.LoggerFactory;
48 
49 /**
50  * A small utility class to provide common functionality related to
51  * IO so that we don't need to duplicate the logic all over the place.
52  *
53  * @author Trond Norbye &lt;trond.norbye@gmail.com&gt;
54  */
55 public final class IOUtils {
56 
57     private static final Logger LOGGER = LoggerFactory.getLogger(IOUtils.class);
58 
59     // private to enforce static
IOUtils()60     private IOUtils() {
61     }
62 
63     /**
64      * If {@code c} is not null, tries to {@code close}, catching and logging
65      * any {@link IOException}.
66      * @param c null or a defined instance
67      */
close(Closeable c)68     public static void close(Closeable c) {
69         if (c != null) {
70             try {
71                 c.close();
72             } catch (IOException e) {
73                 LOGGER.log(Level.WARNING, "Failed to close resource", e);
74             }
75         }
76     }
77 
78     /**
79      * Delete directory recursively. This method does not follow symlinks.
80      * @param path directory to delete
81      * @throws IOException if any read error
82      */
removeRecursive(Path path)83     public static void removeRecursive(Path path) throws IOException {
84         Files.walkFileTree(path, new SimpleFileVisitor<>() {
85             @Override
86             public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
87                     throws IOException {
88                 Files.delete(file);
89                 return FileVisitResult.CONTINUE;
90             }
91 
92             @Override
93             public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
94                 // Try to delete the file anyway.
95                 Files.delete(file);
96                 return FileVisitResult.CONTINUE;
97             }
98 
99             @Override
100             public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
101                 if (exc == null) {
102                     Files.delete(dir);
103                     return FileVisitResult.CONTINUE;
104                 } else {
105                     // Directory traversal failed.
106                     throw exc;
107                 }
108             }
109         });
110     }
111 
112     /**
113      * List files in the directory recursively when looking for files only
114      * ending with suffix.
115      *
116      * @param root starting directory
117      * @param suffix suffix for the files
118      * @return recursively traversed list of files with given suffix
119      */
listFilesRec(File root, String suffix)120     public static List<File> listFilesRec(File root, String suffix) {
121         List<File> results = new ArrayList<>();
122         List<File> files = listFiles(root);
123         for (File f : files) {
124             if (f.isDirectory() && f.canRead() && !f.getName().equals(".") && !f.getName().equals("..")) {
125                 results.addAll(listFilesRec(f, suffix));
126             } else if (suffix != null && !suffix.isEmpty() && f.getName().endsWith(suffix)) {
127                 results.add(f);
128             } else if (suffix == null || suffix.isEmpty()) {
129                 results.add(f);
130             }
131         }
132         return results;
133     }
134 
135     /**
136      * List files in the directory.
137      *
138      * @param root starting directory
139      * @return list of file with suffix
140      */
listFiles(File root)141     public static List<File> listFiles(File root) {
142         return listFiles(root, null);
143     }
144 
145     /**
146      * List files in the directory when looking for files only ending with
147      * suffix.
148      *
149      * @param root starting directory
150      * @param suffix suffix for the files
151      * @return list of file with suffix
152      */
listFiles(File root, String suffix)153     public static List<File> listFiles(File root, String suffix) {
154         File[] files = root.listFiles((dir, name) -> {
155             if (suffix != null && !suffix.isEmpty()) {
156                 return name.endsWith(suffix);
157             } else {
158                 return true;
159             }
160         });
161         if (files == null) {
162             return new ArrayList<>();
163         }
164         return Arrays.asList(files);
165     }
166 
167     /**
168      * Create BOM stripped reader from the stream.
169      * Charset of the reader is set to UTF-8, UTF-16 or system's default.
170      * @param stream input stream
171      * @return reader for the stream without BOM
172      * @throws IOException if I/O exception occurred
173      */
createBOMStrippedReader(InputStream stream)174     public static Reader createBOMStrippedReader(InputStream stream) throws IOException {
175         return createBOMStrippedReader(stream, Charset.defaultCharset().name());
176     }
177 
178     /**
179      * Create BOM stripped reader from the stream.
180      * Charset of the reader is set to UTF-8, UTF-16 or default.
181      * @param stream input stream
182      * @param defaultCharset default charset
183      * @return reader for the stream without BOM
184      * @throws IOException if I/O exception occurred
185      */
createBOMStrippedReader(InputStream stream, String defaultCharset)186     public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException {
187         InputStream in = stream.markSupported() ?
188                 stream : new BufferedInputStream(stream);
189 
190         String charset = null;
191 
192         in.mark(3);
193 
194         byte[] head = new byte[3];
195         int br = in.read(head, 0, 3);
196 
197         if (br >= 2
198                 && (head[0] == (byte) 0xFE && head[1] == (byte) 0xFF)
199                 || (head[0] == (byte) 0xFF && head[1] == (byte) 0xFE)) {
200             charset = StandardCharsets.UTF_16.name();
201             in.reset();
202         } else if (br >= 3 && head[0] == (byte) 0xEF && head[1] == (byte) 0xBB
203                 && head[2] == (byte) 0xBF) {
204             // InputStreamReader does not properly discard BOM on UTF8 streams,
205             // so don't reset the stream.
206             charset = StandardCharsets.UTF_8.name();
207         }
208 
209         if (charset == null) {
210             in.reset();
211             charset = defaultCharset;
212         }
213 
214         return new InputStreamReader(in, charset);
215     }
216 
217     /**
218      * Byte-order markers.
219      */
220     private static final Map<String, byte[]> BOMS = Map.of(
221             StandardCharsets.UTF_8.name(), utf8Bom(),
222             StandardCharsets.UTF_16BE.name(), utf16BeBom(),
223             StandardCharsets.UTF_16LE.name(), utf16LeBom()
224     );
225 
226     /**
227      * Gets a new array containing the UTF-8 BOM.
228      */
utf8Bom()229     public static byte[] utf8Bom() {
230         return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
231     }
232 
233     /**
234      * Gets a new array containing the UTF-16BE BOM (Big-Endian).
235      */
utf16BeBom()236     public static byte[] utf16BeBom() {
237         return new byte[]{(byte) 0xFE, (byte) 0xFF};
238     }
239 
240     /**
241      * Gets a new array containing the UTF-16LE BOM (Little-Endian).
242      */
utf16LeBom()243     public static byte[] utf16LeBom() {
244         return new byte[]{(byte) 0xFF, (byte) 0xFE};
245     }
246 
247     /**
248      * Gets a value indicating a UTF encoding if the array starts with a
249      * known byte sequence.
250      *
251      * @param sig a sequence of bytes to inspect for a BOM
252      * @return null if no BOM was identified; otherwise a defined charset name
253      */
findBOMEncoding(byte[] sig)254     public static String findBOMEncoding(byte[] sig) {
255         for (Map.Entry<String, byte[]> entry : BOMS.entrySet()) {
256             String encoding = entry.getKey();
257             byte[] bom = entry.getValue();
258             if (sig.length > bom.length) {
259                 int i = 0;
260                 while (i < bom.length && sig[i] == bom[i]) {
261                     i++;
262                 }
263                 if (i == bom.length) {
264                     return encoding;
265                 }
266             }
267         }
268         return null;
269     }
270 
271     /**
272      * Gets a value indicating the number of UTF BOM bytes at the start of an
273      * array.
274      *
275      * @param sig a sequence of bytes to inspect for a BOM
276      * @return 0 if the array doesn't start with a BOM; otherwise the number of
277      * BOM bytes
278      */
skipForBOM(byte[] sig)279     public static int skipForBOM(byte[] sig) {
280         String encoding = findBOMEncoding(sig);
281         if (encoding != null) {
282             byte[] bom = BOMS.get(encoding);
283             return bom.length;
284         }
285         return 0;
286     }
287 
288     /**
289      * Get the contents of a file or empty string if the file cannot be read.
290      * @param file file object
291      * @return string with the file contents
292      */
getFileContent(File file)293     public static String getFileContent(File file) {
294         if (file == null || !file.canRead()) {
295             return "";
296         }
297         try {
298             return Files.readString(file.toPath(), Charset.defaultCharset());
299         } catch (IOException e) {
300             LOGGER.log(Level.WARNING, "failed to read file: {0}", e.getMessage());
301         }
302         return "";
303     }
304 }
305