xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/IOUtils.java (revision d6df19e1b22784c78f567cf74c42f18e3901b900)
1b5840353SAdam Hornáček /*
2b5840353SAdam Hornáček  * CDDL HEADER START
3b5840353SAdam Hornáček  *
4b5840353SAdam Hornáček  * The contents of this file are subject to the terms of the
5b5840353SAdam Hornáček  * Common Development and Distribution License (the "License").
6b5840353SAdam Hornáček  * You may not use this file except in compliance with the License.
7b5840353SAdam Hornáček  *
8b5840353SAdam Hornáček  * See LICENSE.txt included in this distribution for the specific
9b5840353SAdam Hornáček  * language governing permissions and limitations under the License.
10b5840353SAdam Hornáček  *
11b5840353SAdam Hornáček  * When distributing Covered Code, include this CDDL HEADER in each
12b5840353SAdam Hornáček  * file and include the License file at LICENSE.txt.
13b5840353SAdam Hornáček  * If applicable, add the following below this CDDL HEADER, with the
14b5840353SAdam Hornáček  * fields enclosed by brackets "[]" replaced with your own identifying
15b5840353SAdam Hornáček  * information: Portions Copyright [yyyy] [name of copyright owner]
16b5840353SAdam Hornáček  *
17b5840353SAdam Hornáček  * CDDL HEADER END
18b5840353SAdam Hornáček  */
19b5840353SAdam Hornáček 
20b5840353SAdam Hornáček /*
21a3065a28SAdam Hornacek  * Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
225d9f3aa0SAdam Hornáček  * Copyright (c) 2011, Trond Norbye.
23750b3115SChris Fraire  * Portions Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>.
24b5840353SAdam Hornáček  */
259805b761SAdam Hornáček package org.opengrok.indexer.util;
26b5840353SAdam Hornáček 
27b5840353SAdam Hornáček import java.io.BufferedInputStream;
28b5840353SAdam Hornáček import java.io.Closeable;
29b5840353SAdam Hornáček import java.io.File;
30b5840353SAdam Hornáček import java.io.IOException;
31b5840353SAdam Hornáček import java.io.InputStream;
32b5840353SAdam Hornáček import java.io.InputStreamReader;
33b5840353SAdam Hornáček import java.io.Reader;
34b5840353SAdam Hornáček import java.nio.charset.Charset;
35b5840353SAdam Hornáček import java.nio.charset.StandardCharsets;
36b5840353SAdam Hornáček import java.nio.file.FileVisitResult;
37b5840353SAdam Hornáček import java.nio.file.Files;
38b5840353SAdam Hornáček import java.nio.file.Path;
39b5840353SAdam Hornáček import java.nio.file.SimpleFileVisitor;
40b5840353SAdam Hornáček import java.nio.file.attribute.BasicFileAttributes;
41b5840353SAdam Hornáček import java.util.ArrayList;
42b5840353SAdam Hornáček import java.util.Arrays;
43b5840353SAdam Hornáček import java.util.List;
44b5840353SAdam Hornáček import java.util.Map;
45b5840353SAdam Hornáček import java.util.logging.Level;
46b5840353SAdam Hornáček import java.util.logging.Logger;
479805b761SAdam Hornáček import org.opengrok.indexer.logger.LoggerFactory;
48b5840353SAdam Hornáček 
49b5840353SAdam Hornáček /**
50b5840353SAdam Hornáček  * A small utility class to provide common functionality related to
51b5840353SAdam Hornáček  * IO so that we don't need to duplicate the logic all over the place.
52b5840353SAdam Hornáček  *
53b5840353SAdam Hornáček  * @author Trond Norbye &lt;trond.norbye@gmail.com&gt;
54b5840353SAdam Hornáček  */
55b5840353SAdam Hornáček public final class IOUtils {
56b5840353SAdam Hornáček 
57b5840353SAdam Hornáček     private static final Logger LOGGER = LoggerFactory.getLogger(IOUtils.class);
58b5840353SAdam Hornáček 
59750b3115SChris Fraire     // private to enforce static
IOUtils()60b5840353SAdam Hornáček     private IOUtils() {
61b5840353SAdam Hornáček     }
62b5840353SAdam Hornáček 
63b5840353SAdam Hornáček     /**
64b5840353SAdam Hornáček      * If {@code c} is not null, tries to {@code close}, catching and logging
65b5840353SAdam Hornáček      * any {@link IOException}.
66b5840353SAdam Hornáček      * @param c null or a defined instance
67b5840353SAdam Hornáček      */
close(Closeable c)68b5840353SAdam Hornáček     public static void close(Closeable c) {
69b5840353SAdam Hornáček         if (c != null) {
70b5840353SAdam Hornáček             try {
71b5840353SAdam Hornáček                 c.close();
72b5840353SAdam Hornáček             } catch (IOException e) {
73750b3115SChris Fraire                 LOGGER.log(Level.WARNING, "Failed to close resource", e);
74b5840353SAdam Hornáček             }
75b5840353SAdam Hornáček         }
76b5840353SAdam Hornáček     }
77b5840353SAdam Hornáček 
78b5840353SAdam Hornáček     /**
79b5840353SAdam Hornáček      * Delete directory recursively. This method does not follow symlinks.
80b5840353SAdam Hornáček      * @param path directory to delete
81b5840353SAdam Hornáček      * @throws IOException if any read error
82b5840353SAdam Hornáček      */
removeRecursive(Path path)83ff44f24aSAdam Hornáček     public static void removeRecursive(Path path) throws IOException {
84c6f0939bSAdam Hornacek         Files.walkFileTree(path, new SimpleFileVisitor<>() {
85b5840353SAdam Hornáček             @Override
86b5840353SAdam Hornáček             public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
87b5840353SAdam Hornáček                     throws IOException {
88b5840353SAdam Hornáček                 Files.delete(file);
89b5840353SAdam Hornáček                 return FileVisitResult.CONTINUE;
90b5840353SAdam Hornáček             }
91b5840353SAdam Hornáček 
92b5840353SAdam Hornáček             @Override
93b5840353SAdam Hornáček             public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
94b5840353SAdam Hornáček                 // Try to delete the file anyway.
95b5840353SAdam Hornáček                 Files.delete(file);
96b5840353SAdam Hornáček                 return FileVisitResult.CONTINUE;
97b5840353SAdam Hornáček             }
98b5840353SAdam Hornáček 
99b5840353SAdam Hornáček             @Override
100b5840353SAdam Hornáček             public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
101b5840353SAdam Hornáček                 if (exc == null) {
102b5840353SAdam Hornáček                     Files.delete(dir);
103b5840353SAdam Hornáček                     return FileVisitResult.CONTINUE;
104b5840353SAdam Hornáček                 } else {
105b5840353SAdam Hornáček                     // Directory traversal failed.
106b5840353SAdam Hornáček                     throw exc;
107b5840353SAdam Hornáček                 }
108b5840353SAdam Hornáček             }
109b5840353SAdam Hornáček         });
110b5840353SAdam Hornáček     }
111b5840353SAdam Hornáček 
112b5840353SAdam Hornáček     /**
113b5840353SAdam Hornáček      * List files in the directory recursively when looking for files only
114b5840353SAdam Hornáček      * ending with suffix.
115b5840353SAdam Hornáček      *
116b5840353SAdam Hornáček      * @param root starting directory
117b5840353SAdam Hornáček      * @param suffix suffix for the files
118b5840353SAdam Hornáček      * @return recursively traversed list of files with given suffix
119b5840353SAdam Hornáček      */
listFilesRec(File root, String suffix)120b5840353SAdam Hornáček     public static List<File> listFilesRec(File root, String suffix) {
121b5840353SAdam Hornáček         List<File> results = new ArrayList<>();
122b5840353SAdam Hornáček         List<File> files = listFiles(root);
123b5840353SAdam Hornáček         for (File f : files) {
124b5840353SAdam Hornáček             if (f.isDirectory() && f.canRead() && !f.getName().equals(".") && !f.getName().equals("..")) {
125b5840353SAdam Hornáček                 results.addAll(listFilesRec(f, suffix));
126b5840353SAdam Hornáček             } else if (suffix != null && !suffix.isEmpty() && f.getName().endsWith(suffix)) {
127b5840353SAdam Hornáček                 results.add(f);
128b5840353SAdam Hornáček             } else if (suffix == null || suffix.isEmpty()) {
129b5840353SAdam Hornáček                 results.add(f);
130b5840353SAdam Hornáček             }
131b5840353SAdam Hornáček         }
132b5840353SAdam Hornáček         return results;
133b5840353SAdam Hornáček     }
134b5840353SAdam Hornáček 
135b5840353SAdam Hornáček     /**
136b5840353SAdam Hornáček      * List files in the directory.
137b5840353SAdam Hornáček      *
138b5840353SAdam Hornáček      * @param root starting directory
139b5840353SAdam Hornáček      * @return list of file with suffix
140b5840353SAdam Hornáček      */
listFiles(File root)141b5840353SAdam Hornáček     public static List<File> listFiles(File root) {
142b5840353SAdam Hornáček         return listFiles(root, null);
143b5840353SAdam Hornáček     }
144b5840353SAdam Hornáček 
145b5840353SAdam Hornáček     /**
146b5840353SAdam Hornáček      * List files in the directory when looking for files only ending with
147b5840353SAdam Hornáček      * suffix.
148b5840353SAdam Hornáček      *
149b5840353SAdam Hornáček      * @param root starting directory
150b5840353SAdam Hornáček      * @param suffix suffix for the files
151b5840353SAdam Hornáček      * @return list of file with suffix
152b5840353SAdam Hornáček      */
listFiles(File root, String suffix)153b5840353SAdam Hornáček     public static List<File> listFiles(File root, String suffix) {
1541161d3e8SAdam Hornacek         File[] files = root.listFiles((dir, name) -> {
155b5840353SAdam Hornáček             if (suffix != null && !suffix.isEmpty()) {
1564b613dedSAdam Hornacek                 return name.endsWith(suffix);
157b5840353SAdam Hornáček             } else {
158b5840353SAdam Hornáček                 return true;
159b5840353SAdam Hornáček             }
160b5840353SAdam Hornáček         });
161b5840353SAdam Hornáček         if (files == null) {
162b5840353SAdam Hornáček             return new ArrayList<>();
163b5840353SAdam Hornáček         }
164b5840353SAdam Hornáček         return Arrays.asList(files);
165b5840353SAdam Hornáček     }
166b5840353SAdam Hornáček 
167b5840353SAdam Hornáček     /**
168b5840353SAdam Hornáček      * Create BOM stripped reader from the stream.
169b5840353SAdam Hornáček      * Charset of the reader is set to UTF-8, UTF-16 or system's default.
170b5840353SAdam Hornáček      * @param stream input stream
171b5840353SAdam Hornáček      * @return reader for the stream without BOM
172b5840353SAdam Hornáček      * @throws IOException if I/O exception occurred
173b5840353SAdam Hornáček      */
createBOMStrippedReader(InputStream stream)174b5840353SAdam Hornáček     public static Reader createBOMStrippedReader(InputStream stream) throws IOException {
175b5840353SAdam Hornáček         return createBOMStrippedReader(stream, Charset.defaultCharset().name());
176b5840353SAdam Hornáček     }
177b5840353SAdam Hornáček 
178b5840353SAdam Hornáček     /**
179b5840353SAdam Hornáček      * Create BOM stripped reader from the stream.
180b5840353SAdam Hornáček      * Charset of the reader is set to UTF-8, UTF-16 or default.
181b5840353SAdam Hornáček      * @param stream input stream
182b5840353SAdam Hornáček      * @param defaultCharset default charset
183b5840353SAdam Hornáček      * @return reader for the stream without BOM
184b5840353SAdam Hornáček      * @throws IOException if I/O exception occurred
185b5840353SAdam Hornáček      */
createBOMStrippedReader(InputStream stream, String defaultCharset)186b5840353SAdam Hornáček     public static Reader createBOMStrippedReader(InputStream stream, String defaultCharset) throws IOException {
187b5840353SAdam Hornáček         InputStream in = stream.markSupported() ?
188b5840353SAdam Hornáček                 stream : new BufferedInputStream(stream);
189b5840353SAdam Hornáček 
190b5840353SAdam Hornáček         String charset = null;
191b5840353SAdam Hornáček 
192b5840353SAdam Hornáček         in.mark(3);
193b5840353SAdam Hornáček 
194b5840353SAdam Hornáček         byte[] head = new byte[3];
195b5840353SAdam Hornáček         int br = in.read(head, 0, 3);
196b5840353SAdam Hornáček 
197b5840353SAdam Hornáček         if (br >= 2
198b5840353SAdam Hornáček                 && (head[0] == (byte) 0xFE && head[1] == (byte) 0xFF)
199b5840353SAdam Hornáček                 || (head[0] == (byte) 0xFF && head[1] == (byte) 0xFE)) {
200750b3115SChris Fraire             charset = StandardCharsets.UTF_16.name();
201b5840353SAdam Hornáček             in.reset();
202b5840353SAdam Hornáček         } else if (br >= 3 && head[0] == (byte) 0xEF && head[1] == (byte) 0xBB
203b5840353SAdam Hornáček                 && head[2] == (byte) 0xBF) {
204b5840353SAdam Hornáček             // InputStreamReader does not properly discard BOM on UTF8 streams,
205b5840353SAdam Hornáček             // so don't reset the stream.
206b5840353SAdam Hornáček             charset = StandardCharsets.UTF_8.name();
207b5840353SAdam Hornáček         }
208b5840353SAdam Hornáček 
209b5840353SAdam Hornáček         if (charset == null) {
210b5840353SAdam Hornáček             in.reset();
211b5840353SAdam Hornáček             charset = defaultCharset;
212b5840353SAdam Hornáček         }
213b5840353SAdam Hornáček 
214b5840353SAdam Hornáček         return new InputStreamReader(in, charset);
215b5840353SAdam Hornáček     }
216b5840353SAdam Hornáček 
217b5840353SAdam Hornáček     /**
218b5840353SAdam Hornáček      * Byte-order markers.
219b5840353SAdam Hornáček      */
220a3065a28SAdam Hornacek     private static final Map<String, byte[]> BOMS = Map.of(
221*d6df19e1SAdam Hornacek             StandardCharsets.UTF_8.name(), utf8Bom(),
222*d6df19e1SAdam Hornacek             StandardCharsets.UTF_16BE.name(), utf16BeBom(),
223*d6df19e1SAdam Hornacek             StandardCharsets.UTF_16LE.name(), utf16LeBom()
224a3065a28SAdam Hornacek     );
225b5840353SAdam Hornáček 
226b5840353SAdam Hornáček     /**
227750b3115SChris Fraire      * Gets a new array containing the UTF-8 BOM.
228750b3115SChris Fraire      */
utf8Bom()229*d6df19e1SAdam Hornacek     public static byte[] utf8Bom() {
230750b3115SChris Fraire         return new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
231750b3115SChris Fraire     }
232750b3115SChris Fraire 
233750b3115SChris Fraire     /**
234750b3115SChris Fraire      * Gets a new array containing the UTF-16BE BOM (Big-Endian).
235750b3115SChris Fraire      */
utf16BeBom()236*d6df19e1SAdam Hornacek     public static byte[] utf16BeBom() {
237750b3115SChris Fraire         return new byte[]{(byte) 0xFE, (byte) 0xFF};
238750b3115SChris Fraire     }
239750b3115SChris Fraire 
240750b3115SChris Fraire     /**
241750b3115SChris Fraire      * Gets a new array containing the UTF-16LE BOM (Little-Endian).
242750b3115SChris Fraire      */
utf16LeBom()243*d6df19e1SAdam Hornacek     public static byte[] utf16LeBom() {
244750b3115SChris Fraire         return new byte[]{(byte) 0xFF, (byte) 0xFE};
245750b3115SChris Fraire     }
246750b3115SChris Fraire 
247750b3115SChris Fraire     /**
248b5840353SAdam Hornáček      * Gets a value indicating a UTF encoding if the array starts with a
249b5840353SAdam Hornáček      * known byte sequence.
250b5840353SAdam Hornáček      *
251b5840353SAdam Hornáček      * @param sig a sequence of bytes to inspect for a BOM
252b5840353SAdam Hornáček      * @return null if no BOM was identified; otherwise a defined charset name
253b5840353SAdam Hornáček      */
findBOMEncoding(byte[] sig)254b5840353SAdam Hornáček     public static String findBOMEncoding(byte[] sig) {
255b5840353SAdam Hornáček         for (Map.Entry<String, byte[]> entry : BOMS.entrySet()) {
256b5840353SAdam Hornáček             String encoding = entry.getKey();
257b5840353SAdam Hornáček             byte[] bom = entry.getValue();
258b5840353SAdam Hornáček             if (sig.length > bom.length) {
259b5840353SAdam Hornáček                 int i = 0;
260b5840353SAdam Hornáček                 while (i < bom.length && sig[i] == bom[i]) {
261b5840353SAdam Hornáček                     i++;
262b5840353SAdam Hornáček                 }
263a72324b1SAdam Hornáček                 if (i == bom.length) {
264a72324b1SAdam Hornáček                     return encoding;
265a72324b1SAdam Hornáček                 }
266b5840353SAdam Hornáček             }
267b5840353SAdam Hornáček         }
268b5840353SAdam Hornáček         return null;
269b5840353SAdam Hornáček     }
270b5840353SAdam Hornáček 
271b5840353SAdam Hornáček     /**
272b5840353SAdam Hornáček      * Gets a value indicating the number of UTF BOM bytes at the start of an
273b5840353SAdam Hornáček      * array.
274b5840353SAdam Hornáček      *
275b5840353SAdam Hornáček      * @param sig a sequence of bytes to inspect for a BOM
276b5840353SAdam Hornáček      * @return 0 if the array doesn't start with a BOM; otherwise the number of
277b5840353SAdam Hornáček      * BOM bytes
278b5840353SAdam Hornáček      */
skipForBOM(byte[] sig)279b5840353SAdam Hornáček     public static int skipForBOM(byte[] sig) {
280b5840353SAdam Hornáček         String encoding = findBOMEncoding(sig);
281b5840353SAdam Hornáček         if (encoding != null) {
282b5840353SAdam Hornáček             byte[] bom = BOMS.get(encoding);
283b5840353SAdam Hornáček             return bom.length;
284b5840353SAdam Hornáček         }
285b5840353SAdam Hornáček         return 0;
286b5840353SAdam Hornáček     }
2872ffbb0cfSVladimir Kotal 
2882ffbb0cfSVladimir Kotal     /**
2892ffbb0cfSVladimir Kotal      * Get the contents of a file or empty string if the file cannot be read.
29081b586e6SVladimir Kotal      * @param file file object
29181b586e6SVladimir Kotal      * @return string with the file contents
2922ffbb0cfSVladimir Kotal      */
getFileContent(File file)2932ffbb0cfSVladimir Kotal     public static String getFileContent(File file) {
2942ffbb0cfSVladimir Kotal         if (file == null || !file.canRead()) {
2952ffbb0cfSVladimir Kotal             return "";
2962ffbb0cfSVladimir Kotal         }
2972ffbb0cfSVladimir Kotal         try {
298a3065a28SAdam Hornacek             return Files.readString(file.toPath(), Charset.defaultCharset());
299a3065a28SAdam Hornacek         } catch (IOException e) {
300a3065a28SAdam Hornacek             LOGGER.log(Level.WARNING, "failed to read file: {0}", e.getMessage());
3012ffbb0cfSVladimir Kotal         }
3022ffbb0cfSVladimir Kotal         return "";
3032ffbb0cfSVladimir Kotal     }
304b5840353SAdam Hornáček }
305