xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/util/PathUtils.java (revision 5e19181c54478b4bbe70b9bdf72e94e49df1b1c7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2017, 2019, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.util;
24 
25 import org.opengrok.indexer.logger.LoggerFactory;
26 
27 import java.io.IOException;
28 import java.nio.file.FileSystem;
29 import java.nio.file.Files;
30 import java.nio.file.InvalidPathException;
31 import java.nio.file.Path;
32 import java.nio.file.Paths;
33 import java.util.Deque;
34 import java.util.LinkedList;
35 import java.util.Optional;
36 import java.util.Set;
37 import java.util.logging.Level;
38 import java.util.logging.Logger;
39 
40 /**
41  * Represents a container for file system paths-related utility methods.
42  */
43 public class PathUtils {
44     private static final Logger LOGGER =
45         LoggerFactory.getLogger(PathUtils.class);
46 
47     /**
48      * Calls {@link #getRelativeToCanonical(Path, Path, Set, Set)}
49      * with {@code path}, {@code canonical}, {@code allowedSymlinks=null}, and
50      * {@code canonicalRoots=null} (to disable validation of links).
51      * @param path a non-canonical (or canonical) path to compare
52      * @param canonical a canonical path to compare against
53      * @return a relative path determined as described -- or {@code path} if no
54      * canonical relativity is found.
55      * @throws IOException if an error occurs determining canonical paths
56      * for portions of {@code path}
57      */
getRelativeToCanonical(Path path, Path canonical)58     public static String getRelativeToCanonical(Path path, Path canonical)
59         throws IOException {
60         try {
61             return getRelativeToCanonical(path, canonical, null, null);
62         } catch (ForbiddenSymlinkException e) {
63             // should not get here with allowedSymlinks==null
64             return path.toString();
65         }
66     }
67 
68     /**
69      * Determine a relative path comparing {@code path} to {@code canonical},
70      * with an algorithm that can handle the possibility of one or more
71      * symbolic links as components of {@code path}.
72      * <p>
73      * When {@code allowedSymlinks} is not null, any symbolic links as
74      * components of {@code path} (below {@code canonical}) are required to
75      * match an element of {@code allowedSymlinks} or target a canonical child
76      * of an element of {@code allowedSymlinks}.
77      * <p>
78      * E.g., with {@code path="/var/opengrok/src/proj_a"} and
79      * {@code canonical="/private/var/opengrok/src"} where /var is linked to
80      * /private/var and where /var/opengrok/src/proj_a is linked to /proj/a,
81      * the function will return {@code "proj_a"} as a relative path.
82      * <p>
83      * The algorithm will have evaluated canonical paths upward from
84      * (non-canonical) /var/opengrok/src/proj_a (a.k.a. /proj/a) to find a
85      * canonical similarity at /var/opengrok/src (a.k.a.
86      * /private/var/opengrok/src).
87      * @param path a non-canonical (or canonical) path to compare
88      * @param canonical a canonical path to compare against
89      * @param allowedSymlinks optional set of allowed symbolic links, so that
90      * any links encountered within {@code path} and not covered by the set (or
91      * whitelisted in a defined {@code canonicalRoots}) will abort the algorithm
92      * @param canonicalRoots optional set of allowed canonicalRoots, so that
93      * any checks done because of a defined {@code allowedSymlinks} will first
94      * check against the whitelist of canonical roots and possibly short-circuit
95      * the explicit validation against {@code allowedSymlinks}.
96      * @return a relative path determined as described above -- or {@code path}
97      * if no canonical relativity is found
98      * @throws ForbiddenSymlinkException if symbolic-link checking is active
99      * and it encounters an ineligible link
100      * @throws InvalidPathException if path cannot be decoded
101      */
getRelativeToCanonical(Path path, Path canonical, Set<String> allowedSymlinks, Set<String> canonicalRoots)102     public static String getRelativeToCanonical(Path path, Path canonical,
103             Set<String> allowedSymlinks, Set<String> canonicalRoots)
104             throws IOException, ForbiddenSymlinkException, InvalidPathException {
105 
106         if (path.equals(canonical)) {
107             return "";
108         }
109 
110         // The following fixup of \\ is really to allow
111         // IndexDatabaseTest.testGetDefinitions() to succeed on Linux or macOS.
112         // That test has an assertion that operation is the "same for windows
113         // delimiters" and passes a path with backslashes. On Windows, the
114         // following fixup would not be needed, since File and Paths recognize
115         // backslash as a delimiter. On Linux and macOS, any backslash needs to
116         // be normalized.
117         final FileSystem fileSystem = path.getFileSystem();
118         final String separator = fileSystem.getSeparator();
119         String strPath = path.toString();
120         strPath = strPath.replace("\\", separator);
121 
122         String strCanonical = canonical.toString();
123         strCanonical = strCanonical.replace("\\", separator);
124         String normCanonical = strCanonical.endsWith(separator) ?
125             strCanonical : strCanonical + separator;
126         Deque<String> tail = null;
127 
128         Path iterPath = fileSystem.getPath(strPath);
129         while (iterPath != null) {
130             Path iterCanon;
131             try {
132                 iterCanon = iterPath.toRealPath();
133             } catch (IOException e) {
134                 iterCanon = iterPath.normalize().toAbsolutePath();
135             }
136 
137             // optional symbolic-link check
138             if (allowedSymlinks != null) {
139                 if (Files.isSymbolicLink(iterPath) &&
140                     !isWhitelisted(iterCanon.toString(), canonicalRoots) &&
141                     !isAllowedSymlink(iterCanon, allowedSymlinks)) {
142                     String format = String.format("%1$s is prohibited symlink", iterPath);
143                     LOGGER.finest(format);
144                     throw new ForbiddenSymlinkException(format);
145                 }
146             }
147 
148             String rel = null;
149             if (iterCanon.startsWith(normCanonical)) {
150                 rel = fileSystem.getPath(normCanonical).relativize(iterCanon).toString();
151             } else if (normCanonical.equals(iterCanon + separator)) {
152                 rel = "";
153             }
154             if (rel != null) {
155                 if (tail != null) {
156                     while (tail.size() > 0) {
157                         rel = Paths.get(rel, tail.pop()).toString();
158                     }
159                 }
160                 return rel;
161             }
162 
163             if (tail == null) {
164                 tail = new LinkedList<>();
165             }
166             tail.push(Optional.ofNullable(iterPath.getFileName()).map(Path::toString).orElse(""));
167             iterPath = iterPath.getParent();
168         }
169 
170         // `path' is not found to be relative to `canonical', so return as is.
171         return path.toString();
172     }
173 
isAllowedSymlink(Path canonicalFile, Set<String> allowedSymlinks)174     private static boolean isAllowedSymlink(Path canonicalFile,
175         Set<String> allowedSymlinks) {
176         final FileSystem fileSystem = canonicalFile.getFileSystem();
177         String canonicalFileStr = canonicalFile.toString();
178         for (String allowedSymlink : allowedSymlinks) {
179             String canonicalLink;
180             try {
181                 canonicalLink = fileSystem.getPath(allowedSymlink).toRealPath().toString();
182             } catch (IOException e) {
183                 if (LOGGER.isLoggable(Level.FINE)) {
184                     LOGGER.fine(String.format("unresolvable symlink: %s",
185                         allowedSymlink));
186                 }
187                 continue;
188             }
189             if (canonicalFileStr.equals(canonicalLink) ||
190                 canonicalFile.startsWith(canonicalLink + fileSystem.getSeparator())) {
191                 return true;
192             }
193         }
194         return false;
195     }
196 
isWhitelisted(String canonical, Set<String> canonicalRoots)197     private static boolean isWhitelisted(String canonical, Set<String> canonicalRoots) {
198         if (canonicalRoots != null) {
199             for (String canonicalRoot : canonicalRoots) {
200                 if (canonical.startsWith(canonicalRoot)) {
201                     return true;
202                 }
203             }
204         }
205         return false;
206     }
207 
208     /** Private to enforce static. */
PathUtils()209     private PathUtils() {
210     }
211 }
212