View Javadoc
1   /*
2    * Copyright (C) 2008, 2019 Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.util;
12  
13  import static java.nio.charset.StandardCharsets.UTF_8;
14  
15  import java.util.Arrays;
16  
17  import org.eclipse.jgit.lib.Constants;
18  
19  /**
20   * Utility functions related to quoted string handling.
21   */
22  public abstract class QuotedString {
23  	/** Quoting style that obeys the rules Git applies to file names */
24  	public static final GitPathStyle GIT_PATH = new GitPathStyle(true);
25  
26  	/**
27  	 * Quoting style that obeys the rules Git applies to file names when
28  	 * {@code core.quotePath = false}.
29  	 *
30  	 * @since 5.6
31  	 */
32  	public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false);
33  
34  	/**
35  	 * Quoting style used by the Bourne shell.
36  	 * <p>
37  	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
38  	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
39  	 * being recognized as special.
40  	 */
41  	public static final BourneStyle BOURNE = new BourneStyle();
42  
43  	/** Bourne style, but permits <code>~user</code> at the start of the string. */
44  	public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
45  
46  	/**
47  	 * Quote an input string by the quoting rules.
48  	 * <p>
49  	 * If the input string does not require any quoting, the same String
50  	 * reference is returned to the caller.
51  	 * <p>
52  	 * Otherwise a quoted string is returned, including the opening and closing
53  	 * quotation marks at the start and end of the string. If the style does not
54  	 * permit raw Unicode characters then the string will first be encoded in
55  	 * UTF-8, with unprintable sequences possibly escaped by the rules.
56  	 *
57  	 * @param in
58  	 *            any non-null Unicode string.
59  	 * @return a quoted string. See above for details.
60  	 */
61  	public abstract String quote(String in);
62  
63  	/**
64  	 * Clean a previously quoted input, decoding the result via UTF-8.
65  	 * <p>
66  	 * This method must match quote such that:
67  	 *
68  	 * <pre>
69  	 * a.equals(dequote(quote(a)));
70  	 * </pre>
71  	 *
72  	 * is true for any <code>a</code>.
73  	 *
74  	 * @param in
75  	 *            a Unicode string to remove quoting from.
76  	 * @return the cleaned string.
77  	 * @see #dequote(byte[], int, int)
78  	 */
79  	public String dequote(String in) {
80  		final byte[] b = Constants.encode(in);
81  		return dequote(b, 0, b.length);
82  	}
83  
84  	/**
85  	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
86  	 * <p>
87  	 * This method must match quote such that:
88  	 *
89  	 * <pre>
90  	 * a.equals(dequote(Constants.encode(quote(a))));
91  	 * </pre>
92  	 *
93  	 * is true for any <code>a</code>.
94  	 * <p>
95  	 * This method removes any opening/closing quotation marks added by
96  	 * {@link #quote(String)}.
97  	 *
98  	 * @param in
99  	 *            the input buffer to parse.
100 	 * @param offset
101 	 *            first position within <code>in</code> to scan.
102 	 * @param end
103 	 *            one position past in <code>in</code> to scan.
104 	 * @return the cleaned string.
105 	 */
106 	public abstract String dequote(byte[] in, int offset, int end);
107 
108 	/**
109 	 * Quoting style used by the Bourne shell.
110 	 * <p>
111 	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
112 	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
113 	 * being recognized as special.
114 	 */
115 	public static class BourneStyle extends QuotedString {
116 		@Override
117 		public String quote(String in) {
118 			final StringBuilder r = new StringBuilder();
119 			r.append('\'');
120 			int start = 0, i = 0;
121 			for (; i < in.length(); i++) {
122 				switch (in.charAt(i)) {
123 				case '\'':
124 				case '!':
125 					r.append(in, start, i);
126 					r.append('\'');
127 					r.append('\\');
128 					r.append(in.charAt(i));
129 					r.append('\'');
130 					start = i + 1;
131 					break;
132 				}
133 			}
134 			r.append(in, start, i);
135 			r.append('\'');
136 			return r.toString();
137 		}
138 
139 		@Override
140 		public String dequote(byte[] in, int ip, int ie) {
141 			boolean inquote = false;
142 			final byte[] r = new byte[ie - ip];
143 			int rPtr = 0;
144 			while (ip < ie) {
145 				final byte b = in[ip++];
146 				switch (b) {
147 				case '\'':
148 					inquote = !inquote;
149 					continue;
150 				case '\\':
151 					if (inquote || ip == ie)
152 						r[rPtr++] = b; // literal within a quote
153 					else
154 						r[rPtr++] = in[ip++];
155 					continue;
156 				default:
157 					r[rPtr++] = b;
158 					continue;
159 				}
160 			}
161 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
162 		}
163 	}
164 
165 	/** Bourne style, but permits <code>~user</code> at the start of the string. */
166 	public static class BourneUserPathStyle extends BourneStyle {
167 		@Override
168 		public String quote(String in) {
169 			if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
170 				// If the string is just "~user" we can assume they
171 				// mean "~user/".
172 				//
173 				return in + "/"; //$NON-NLS-1$
174 			}
175 
176 			if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
177 				// If the string is of "~/path" or "~user/path"
178 				// we must not escape ~/ or ~user/ from the shell.
179 				//
180 				final int i = in.indexOf('/') + 1;
181 				if (i == in.length())
182 					return in;
183 				return in.substring(0, i) + super.quote(in.substring(i));
184 			}
185 
186 			return super.quote(in);
187 		}
188 	}
189 
190 	/** Quoting style that obeys the rules Git applies to file names */
191 	public static final class GitPathStyle extends QuotedString {
192 		private static final byte[] quote;
193 		static {
194 			quote = new byte[128];
195 			Arrays.fill(quote, (byte) -1);
196 
197 			for (int i = '0'; i <= '9'; i++)
198 				quote[i] = 0;
199 			for (int i = 'a'; i <= 'z'; i++)
200 				quote[i] = 0;
201 			for (int i = 'A'; i <= 'Z'; i++)
202 				quote[i] = 0;
203 			quote[' '] = 0;
204 			quote['$'] = 0;
205 			quote['%'] = 0;
206 			quote['&'] = 0;
207 			quote['*'] = 0;
208 			quote['+'] = 0;
209 			quote[','] = 0;
210 			quote['-'] = 0;
211 			quote['.'] = 0;
212 			quote['/'] = 0;
213 			quote[':'] = 0;
214 			quote[';'] = 0;
215 			quote['='] = 0;
216 			quote['?'] = 0;
217 			quote['@'] = 0;
218 			quote['_'] = 0;
219 			quote['^'] = 0;
220 			quote['|'] = 0;
221 			quote['~'] = 0;
222 
223 			quote['\u0007'] = 'a';
224 			quote['\b'] = 'b';
225 			quote['\f'] = 'f';
226 			quote['\n'] = 'n';
227 			quote['\r'] = 'r';
228 			quote['\t'] = 't';
229 			quote['\u000B'] = 'v';
230 			quote['\\'] = '\\';
231 			quote['"'] = '"';
232 		}
233 
234 		private final boolean quoteHigh;
235 
236 		@Override
237 		public String quote(String instr) {
238 			if (instr.isEmpty()) {
239 				return "\"\""; //$NON-NLS-1$
240 			}
241 			boolean reuse = true;
242 			final byte[] in = Constants.encode(instr);
243 			final byte[] out = new byte[4 * in.length + 2];
244 			int o = 0;
245 			out[o++] = '"';
246 			for (byte element : in) {
247 				final int c = element & 0xff;
248 				if (c < quote.length) {
249 					final byte style = quote[c];
250 					if (style == 0) {
251 						out[o++] = (byte) c;
252 						continue;
253 					}
254 					if (style > 0) {
255 						reuse = false;
256 						out[o++] = '\\';
257 						out[o++] = style;
258 						continue;
259 					}
260 				} else if (!quoteHigh) {
261 					out[o++] = (byte) c;
262 					continue;
263 				}
264 
265 				reuse = false;
266 				out[o++] = '\\';
267 				out[o++] = (byte) (((c >> 6) & 03) + '0');
268 				out[o++] = (byte) (((c >> 3) & 07) + '0');
269 				out[o++] = (byte) (((c >> 0) & 07) + '0');
270 			}
271 			if (reuse) {
272 				return instr;
273 			}
274 			out[o++] = '"';
275 			return new String(out, 0, o, UTF_8);
276 		}
277 
278 		@Override
279 		public String dequote(byte[] in, int inPtr, int inEnd) {
280 			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
281 				return dq(in, inPtr + 1, inEnd - 1);
282 			return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
283 		}
284 
285 		private static String dq(byte[] in, int inPtr, int inEnd) {
286 			final byte[] r = new byte[inEnd - inPtr];
287 			int rPtr = 0;
288 			while (inPtr < inEnd) {
289 				final byte b = in[inPtr++];
290 				if (b != '\\') {
291 					r[rPtr++] = b;
292 					continue;
293 				}
294 
295 				if (inPtr == inEnd) {
296 					// Lone trailing backslash. Treat it as a literal.
297 					//
298 					r[rPtr++] = '\\';
299 					break;
300 				}
301 
302 				switch (in[inPtr++]) {
303 				case 'a':
304 					r[rPtr++] = 0x07 /* \a = BEL */;
305 					continue;
306 				case 'b':
307 					r[rPtr++] = '\b';
308 					continue;
309 				case 'f':
310 					r[rPtr++] = '\f';
311 					continue;
312 				case 'n':
313 					r[rPtr++] = '\n';
314 					continue;
315 				case 'r':
316 					r[rPtr++] = '\r';
317 					continue;
318 				case 't':
319 					r[rPtr++] = '\t';
320 					continue;
321 				case 'v':
322 					r[rPtr++] = 0x0B/* \v = VT */;
323 					continue;
324 
325 				case '\\':
326 				case '"':
327 					r[rPtr++] = in[inPtr - 1];
328 					continue;
329 
330 				case '0':
331 				case '1':
332 				case '2':
333 				case '3': {
334 					int cp = in[inPtr - 1] - '0';
335 					for (int n = 1; n < 3 && inPtr < inEnd; n++) {
336 						final byte c = in[inPtr];
337 						if ('0' <= c && c <= '7') {
338 							cp <<= 3;
339 							cp |= c - '0';
340 							inPtr++;
341 						} else {
342 							break;
343 						}
344 					}
345 					r[rPtr++] = (byte) cp;
346 					continue;
347 				}
348 
349 				default:
350 					// Any other code is taken literally.
351 					//
352 					r[rPtr++] = '\\';
353 					r[rPtr++] = in[inPtr - 1];
354 					continue;
355 				}
356 			}
357 
358 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
359 		}
360 
361 		private GitPathStyle(boolean doQuote) {
362 			quoteHigh = doQuote;
363 		}
364 	}
365 }