View Javadoc
1   /*
2    * Copyright (C) 2008, 2019 Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.util;
45  
46  import static java.nio.charset.StandardCharsets.UTF_8;
47  
48  import java.util.Arrays;
49  
50  import org.eclipse.jgit.lib.Constants;
51  
52  /**
53   * Utility functions related to quoted string handling.
54   */
55  public abstract class QuotedString {
56  	/** Quoting style that obeys the rules Git applies to file names */
57  	public static final GitPathStyle GIT_PATH = new GitPathStyle(true);
58  
59  	/**
60  	 * Quoting style that obeys the rules Git applies to file names when
61  	 * {@code core.quotePath = false}.
62  	 *
63  	 * @since 5.6
64  	 */
65  	public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false);
66  
67  	/**
68  	 * Quoting style used by the Bourne shell.
69  	 * <p>
70  	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
71  	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
72  	 * being recognized as special.
73  	 */
74  	public static final BourneStyle BOURNE = new BourneStyle();
75  
76  	/** Bourne style, but permits <code>~user</code> at the start of the string. */
77  	public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
78  
79  	/**
80  	 * Quote an input string by the quoting rules.
81  	 * <p>
82  	 * If the input string does not require any quoting, the same String
83  	 * reference is returned to the caller.
84  	 * <p>
85  	 * Otherwise a quoted string is returned, including the opening and closing
86  	 * quotation marks at the start and end of the string. If the style does not
87  	 * permit raw Unicode characters then the string will first be encoded in
88  	 * UTF-8, with unprintable sequences possibly escaped by the rules.
89  	 *
90  	 * @param in
91  	 *            any non-null Unicode string.
92  	 * @return a quoted string. See above for details.
93  	 */
94  	public abstract String quote(String in);
95  
96  	/**
97  	 * Clean a previously quoted input, decoding the result via UTF-8.
98  	 * <p>
99  	 * This method must match quote such that:
100 	 *
101 	 * <pre>
102 	 * a.equals(dequote(quote(a)));
103 	 * </pre>
104 	 *
105 	 * is true for any <code>a</code>.
106 	 *
107 	 * @param in
108 	 *            a Unicode string to remove quoting from.
109 	 * @return the cleaned string.
110 	 * @see #dequote(byte[], int, int)
111 	 */
112 	public String dequote(String in) {
113 		final byte[] b = Constants.encode(in);
114 		return dequote(b, 0, b.length);
115 	}
116 
117 	/**
118 	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
119 	 * <p>
120 	 * This method must match quote such that:
121 	 *
122 	 * <pre>
123 	 * a.equals(dequote(Constants.encode(quote(a))));
124 	 * </pre>
125 	 *
126 	 * is true for any <code>a</code>.
127 	 * <p>
128 	 * This method removes any opening/closing quotation marks added by
129 	 * {@link #quote(String)}.
130 	 *
131 	 * @param in
132 	 *            the input buffer to parse.
133 	 * @param offset
134 	 *            first position within <code>in</code> to scan.
135 	 * @param end
136 	 *            one position past in <code>in</code> to scan.
137 	 * @return the cleaned string.
138 	 */
139 	public abstract String dequote(byte[] in, int offset, int end);
140 
141 	/**
142 	 * Quoting style used by the Bourne shell.
143 	 * <p>
144 	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
145 	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
146 	 * being recognized as special.
147 	 */
148 	public static class BourneStyle extends QuotedString {
149 		@Override
150 		public String quote(String in) {
151 			final StringBuilder r = new StringBuilder();
152 			r.append('\'');
153 			int start = 0, i = 0;
154 			for (; i < in.length(); i++) {
155 				switch (in.charAt(i)) {
156 				case '\'':
157 				case '!':
158 					r.append(in, start, i);
159 					r.append('\'');
160 					r.append('\\');
161 					r.append(in.charAt(i));
162 					r.append('\'');
163 					start = i + 1;
164 					break;
165 				}
166 			}
167 			r.append(in, start, i);
168 			r.append('\'');
169 			return r.toString();
170 		}
171 
172 		@Override
173 		public String dequote(byte[] in, int ip, int ie) {
174 			boolean inquote = false;
175 			final byte[] r = new byte[ie - ip];
176 			int rPtr = 0;
177 			while (ip < ie) {
178 				final byte b = in[ip++];
179 				switch (b) {
180 				case '\'':
181 					inquote = !inquote;
182 					continue;
183 				case '\\':
184 					if (inquote || ip == ie)
185 						r[rPtr++] = b; // literal within a quote
186 					else
187 						r[rPtr++] = in[ip++];
188 					continue;
189 				default:
190 					r[rPtr++] = b;
191 					continue;
192 				}
193 			}
194 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
195 		}
196 	}
197 
198 	/** Bourne style, but permits <code>~user</code> at the start of the string. */
199 	public static class BourneUserPathStyle extends BourneStyle {
200 		@Override
201 		public String quote(String in) {
202 			if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
203 				// If the string is just "~user" we can assume they
204 				// mean "~user/".
205 				//
206 				return in + "/"; //$NON-NLS-1$
207 			}
208 
209 			if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
210 				// If the string is of "~/path" or "~user/path"
211 				// we must not escape ~/ or ~user/ from the shell.
212 				//
213 				final int i = in.indexOf('/') + 1;
214 				if (i == in.length())
215 					return in;
216 				return in.substring(0, i) + super.quote(in.substring(i));
217 			}
218 
219 			return super.quote(in);
220 		}
221 	}
222 
223 	/** Quoting style that obeys the rules Git applies to file names */
224 	public static final class GitPathStyle extends QuotedString {
225 		private static final byte[] quote;
226 		static {
227 			quote = new byte[128];
228 			Arrays.fill(quote, (byte) -1);
229 
230 			for (int i = '0'; i <= '9'; i++)
231 				quote[i] = 0;
232 			for (int i = 'a'; i <= 'z'; i++)
233 				quote[i] = 0;
234 			for (int i = 'A'; i <= 'Z'; i++)
235 				quote[i] = 0;
236 			quote[' '] = 0;
237 			quote['$'] = 0;
238 			quote['%'] = 0;
239 			quote['&'] = 0;
240 			quote['*'] = 0;
241 			quote['+'] = 0;
242 			quote[','] = 0;
243 			quote['-'] = 0;
244 			quote['.'] = 0;
245 			quote['/'] = 0;
246 			quote[':'] = 0;
247 			quote[';'] = 0;
248 			quote['='] = 0;
249 			quote['?'] = 0;
250 			quote['@'] = 0;
251 			quote['_'] = 0;
252 			quote['^'] = 0;
253 			quote['|'] = 0;
254 			quote['~'] = 0;
255 
256 			quote['\u0007'] = 'a';
257 			quote['\b'] = 'b';
258 			quote['\f'] = 'f';
259 			quote['\n'] = 'n';
260 			quote['\r'] = 'r';
261 			quote['\t'] = 't';
262 			quote['\u000B'] = 'v';
263 			quote['\\'] = '\\';
264 			quote['"'] = '"';
265 		}
266 
267 		private final boolean quoteHigh;
268 
269 		@Override
270 		public String quote(String instr) {
271 			if (instr.isEmpty()) {
272 				return "\"\""; //$NON-NLS-1$
273 			}
274 			boolean reuse = true;
275 			final byte[] in = Constants.encode(instr);
276 			final byte[] out = new byte[4 * in.length + 2];
277 			int o = 0;
278 			out[o++] = '"';
279 			for (int i = 0; i < in.length; i++) {
280 				final int c = in[i] & 0xff;
281 				if (c < quote.length) {
282 					final byte style = quote[c];
283 					if (style == 0) {
284 						out[o++] = (byte) c;
285 						continue;
286 					}
287 					if (style > 0) {
288 						reuse = false;
289 						out[o++] = '\\';
290 						out[o++] = style;
291 						continue;
292 					}
293 				} else if (!quoteHigh) {
294 					out[o++] = (byte) c;
295 					continue;
296 				}
297 
298 				reuse = false;
299 				out[o++] = '\\';
300 				out[o++] = (byte) (((c >> 6) & 03) + '0');
301 				out[o++] = (byte) (((c >> 3) & 07) + '0');
302 				out[o++] = (byte) (((c >> 0) & 07) + '0');
303 			}
304 			if (reuse) {
305 				return instr;
306 			}
307 			out[o++] = '"';
308 			return new String(out, 0, o, UTF_8);
309 		}
310 
311 		@Override
312 		public String dequote(byte[] in, int inPtr, int inEnd) {
313 			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
314 				return dq(in, inPtr + 1, inEnd - 1);
315 			return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
316 		}
317 
318 		private static String dq(byte[] in, int inPtr, int inEnd) {
319 			final byte[] r = new byte[inEnd - inPtr];
320 			int rPtr = 0;
321 			while (inPtr < inEnd) {
322 				final byte b = in[inPtr++];
323 				if (b != '\\') {
324 					r[rPtr++] = b;
325 					continue;
326 				}
327 
328 				if (inPtr == inEnd) {
329 					// Lone trailing backslash. Treat it as a literal.
330 					//
331 					r[rPtr++] = '\\';
332 					break;
333 				}
334 
335 				switch (in[inPtr++]) {
336 				case 'a':
337 					r[rPtr++] = 0x07 /* \a = BEL */;
338 					continue;
339 				case 'b':
340 					r[rPtr++] = '\b';
341 					continue;
342 				case 'f':
343 					r[rPtr++] = '\f';
344 					continue;
345 				case 'n':
346 					r[rPtr++] = '\n';
347 					continue;
348 				case 'r':
349 					r[rPtr++] = '\r';
350 					continue;
351 				case 't':
352 					r[rPtr++] = '\t';
353 					continue;
354 				case 'v':
355 					r[rPtr++] = 0x0B/* \v = VT */;
356 					continue;
357 
358 				case '\\':
359 				case '"':
360 					r[rPtr++] = in[inPtr - 1];
361 					continue;
362 
363 				case '0':
364 				case '1':
365 				case '2':
366 				case '3': {
367 					int cp = in[inPtr - 1] - '0';
368 					for (int n = 1; n < 3 && inPtr < inEnd; n++) {
369 						final byte c = in[inPtr];
370 						if ('0' <= c && c <= '7') {
371 							cp <<= 3;
372 							cp |= c - '0';
373 							inPtr++;
374 						} else {
375 							break;
376 						}
377 					}
378 					r[rPtr++] = (byte) cp;
379 					continue;
380 				}
381 
382 				default:
383 					// Any other code is taken literally.
384 					//
385 					r[rPtr++] = '\\';
386 					r[rPtr++] = in[inPtr - 1];
387 					continue;
388 				}
389 			}
390 
391 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
392 		}
393 
394 		private GitPathStyle(boolean doQuote) {
395 			quoteHigh = doQuote;
396 		}
397 	}
398 }