xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/web/Laundromat.java (revision 5d9f3aa0ca3da3a714233f987fa732f62c0965f6)
1*d26fc5a1SChris Fraire /*
2*d26fc5a1SChris Fraire  * CDDL HEADER START
3*d26fc5a1SChris Fraire  *
4*d26fc5a1SChris Fraire  * The contents of this file are subject to the terms of the
5*d26fc5a1SChris Fraire  * Common Development and Distribution License (the "License").
6*d26fc5a1SChris Fraire  * You may not use this file except in compliance with the License.
7*d26fc5a1SChris Fraire  *
8*d26fc5a1SChris Fraire  * See LICENSE.txt included in this distribution for the specific
9*d26fc5a1SChris Fraire  * language governing permissions and limitations under the License.
10*d26fc5a1SChris Fraire  *
11*d26fc5a1SChris Fraire  * When distributing Covered Code, include this CDDL HEADER in each
12*d26fc5a1SChris Fraire  * file and include the License file at LICENSE.txt.
13*d26fc5a1SChris Fraire  * If applicable, add the following below this CDDL HEADER, with the
14*d26fc5a1SChris Fraire  * fields enclosed by brackets "[]" replaced with your own identifying
15*d26fc5a1SChris Fraire  * information: Portions Copyright [yyyy] [name of copyright owner]
16*d26fc5a1SChris Fraire  *
17*d26fc5a1SChris Fraire  * CDDL HEADER END
18*d26fc5a1SChris Fraire  */
19*d26fc5a1SChris Fraire 
20*d26fc5a1SChris Fraire /*
21*d26fc5a1SChris Fraire  * Copyright (c) 2020, Chris Fraire <cfraire@me.com>.
22*d26fc5a1SChris Fraire  */
23*d26fc5a1SChris Fraire package org.opengrok.indexer.web;
24*d26fc5a1SChris Fraire 
25*d26fc5a1SChris Fraire import java.util.HashMap;
26*d26fc5a1SChris Fraire import java.util.Map;
27*d26fc5a1SChris Fraire import java.util.stream.Collectors;
28*d26fc5a1SChris Fraire 
29*d26fc5a1SChris Fraire /**
30*d26fc5a1SChris Fraire  * Represents a container for sanitizing methods for avoiding classifications as
31*d26fc5a1SChris Fraire  * taint bugs.
32*d26fc5a1SChris Fraire  */
33*d26fc5a1SChris Fraire public class Laundromat {
34*d26fc5a1SChris Fraire 
35*d26fc5a1SChris Fraire     private static final String ESC_N_R_T_F = "[\\n\\r\\t\\f]";
36*d26fc5a1SChris Fraire     private static final String ESG_N_R_T_F__1_n = ESC_N_R_T_F + "+";
37*d26fc5a1SChris Fraire 
38*d26fc5a1SChris Fraire     /**
39*d26fc5a1SChris Fraire      * Sanitize {@code value} where it will be used in subsequent OpenGrok
40*d26fc5a1SChris Fraire      * (non-logging) processing.
41*d26fc5a1SChris Fraire      * @return {@code null} if null or else {@code value} with "pattern-breaking
42*d26fc5a1SChris Fraire      * characters" (tabs, CR, LF, FF) replaced as underscores (one for one)
43*d26fc5a1SChris Fraire      */
launderInput(String value)44*d26fc5a1SChris Fraire     public static String launderInput(String value) {
45*d26fc5a1SChris Fraire         return replaceAll(value, ESC_N_R_T_F, "_");
46*d26fc5a1SChris Fraire     }
47*d26fc5a1SChris Fraire 
48*d26fc5a1SChris Fraire     /**
49*d26fc5a1SChris Fraire      * Sanitize {@code value} where it will be used in a Lucene query.
50*d26fc5a1SChris Fraire      * @return {@code null} if null or else {@code value} with "pattern-breaking
51*d26fc5a1SChris Fraire      * characters" (tabs, CR, LF, FF) replaced as spaces. Contiguous matches are
52*d26fc5a1SChris Fraire      * replaced with one space.
53*d26fc5a1SChris Fraire      */
launderQuery(String value)54*d26fc5a1SChris Fraire     public static String launderQuery(String value) {
55*d26fc5a1SChris Fraire         return replaceAll(value, ESG_N_R_T_F__1_n, " ");
56*d26fc5a1SChris Fraire     }
57*d26fc5a1SChris Fraire 
58*d26fc5a1SChris Fraire     /**
59*d26fc5a1SChris Fraire      * Sanitize {@code value} where it will be used in a log message only.
60*d26fc5a1SChris Fraire      * @return {@code null} if null or else {@code value} with "pattern-breaking
61*d26fc5a1SChris Fraire      * characters" tabs, CR, LF, and FF replaced as {@code "<TAB>"},
62*d26fc5a1SChris Fraire      * {@code "<CR>"}, {@code "<LF>"}, and {@code "<FF>"} resp.
63*d26fc5a1SChris Fraire      */
launderLog(String value)64*d26fc5a1SChris Fraire     public static String launderLog(String value) {
65*d26fc5a1SChris Fraire         if (value == null) {
66*d26fc5a1SChris Fraire             return null;
67*d26fc5a1SChris Fraire         }
68*d26fc5a1SChris Fraire         return value.replaceAll("\\n", "<LF>").
69*d26fc5a1SChris Fraire                 replaceAll("\\r", "<CR>").
70*d26fc5a1SChris Fraire                 replaceAll("\\t", "<TAB>").
71*d26fc5a1SChris Fraire                 replaceAll("\\f", "<FF>");
72*d26fc5a1SChris Fraire     }
73*d26fc5a1SChris Fraire 
74*d26fc5a1SChris Fraire     /**
75*d26fc5a1SChris Fraire      * Sanitize {@code map} where it will be used in a log message only.
76*d26fc5a1SChris Fraire      * @return {@code null} if null or else {@code map} with keys and values
77*d26fc5a1SChris Fraire      * sanitized with {@link #launderLog(String)}. If the sanitizing causes key
78*d26fc5a1SChris Fraire      * collisions, the colliding keys' values are combined.
79*d26fc5a1SChris Fraire      */
launderLog(Map<String, String[]> map)80*d26fc5a1SChris Fraire     public static Map<String, String[]> launderLog(Map<String, String[]> map) {
81*d26fc5a1SChris Fraire         if (map == null) {
82*d26fc5a1SChris Fraire             return null;
83*d26fc5a1SChris Fraire         }
84*d26fc5a1SChris Fraire 
85*d26fc5a1SChris Fraire         HashMap<String, String[]> safes = new HashMap<>();
86*d26fc5a1SChris Fraire         for (Map.Entry<String, String[]> entry : map.entrySet().stream().sorted(
87*d26fc5a1SChris Fraire                 Map.Entry.comparingByKey()).collect(Collectors.toList())) {
88*d26fc5a1SChris Fraire             String key = launderLog(entry.getKey());
89*d26fc5a1SChris Fraire             String[] safeValues = safes.get(key);
90*d26fc5a1SChris Fraire             String[] fullySafe = mergeLogArrays(entry.getValue(), safeValues);
91*d26fc5a1SChris Fraire             safes.put(key, fullySafe);
92*d26fc5a1SChris Fraire         }
93*d26fc5a1SChris Fraire         return safes;
94*d26fc5a1SChris Fraire     }
95*d26fc5a1SChris Fraire 
mergeLogArrays(String[] values, String[] safeValues)96*d26fc5a1SChris Fraire     private static String[] mergeLogArrays(String[] values, String[] safeValues) {
97*d26fc5a1SChris Fraire         if (values == null && safeValues == null) {
98*d26fc5a1SChris Fraire             return null;
99*d26fc5a1SChris Fraire         }
100*d26fc5a1SChris Fraire 
101*d26fc5a1SChris Fraire         int n = (values != null ? values.length : 0) +
102*d26fc5a1SChris Fraire                 (safeValues != null ? safeValues.length : 0);
103*d26fc5a1SChris Fraire         String[] result = new String[n];
104*d26fc5a1SChris Fraire 
105*d26fc5a1SChris Fraire         int i = 0;
106*d26fc5a1SChris Fraire         if (values != null) {
107*d26fc5a1SChris Fraire             for (; i < values.length; ++i) {
108*d26fc5a1SChris Fraire                 result[i] = launderLog(values[i]);
109*d26fc5a1SChris Fraire             }
110*d26fc5a1SChris Fraire         }
111*d26fc5a1SChris Fraire         if (safeValues != null) {
112*d26fc5a1SChris Fraire             System.arraycopy(safeValues, 0, result, i, safeValues.length);
113*d26fc5a1SChris Fraire         }
114*d26fc5a1SChris Fraire         return result;
115*d26fc5a1SChris Fraire     }
116*d26fc5a1SChris Fraire 
replaceAll(String value, String regex, String replacement)117*d26fc5a1SChris Fraire     private static String replaceAll(String value, String regex, String replacement) {
118*d26fc5a1SChris Fraire         if (value == null) {
119*d26fc5a1SChris Fraire             return null;
120*d26fc5a1SChris Fraire         }
121*d26fc5a1SChris Fraire         return value.replaceAll(regex, replacement);
122*d26fc5a1SChris Fraire     }
123*d26fc5a1SChris Fraire 
124*d26fc5a1SChris Fraire     /* private to enforce static */
Laundromat()125*d26fc5a1SChris Fraire     private Laundromat() {
126*d26fc5a1SChris Fraire     }
127*d26fc5a1SChris Fraire }
128