View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.util;
45  
46  import static java.nio.charset.StandardCharsets.UTF_8;
47  
48  import java.util.Arrays;
49  
50  import org.eclipse.jgit.lib.Constants;
51  
52  /**
53   * Utility functions related to quoted string handling.
54   */
55  public abstract class QuotedString {
56  	/** Quoting style that obeys the rules Git applies to file names */
57  	public static final GitPathStyle GIT_PATH = new GitPathStyle();
58  
59  	/**
60  	 * Quoting style used by the Bourne shell.
61  	 * <p>
62  	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
63  	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
64  	 * being recognized as special.
65  	 */
66  	public static final BourneStyle BOURNE = new BourneStyle();
67  
68  	/** Bourne style, but permits <code>~user</code> at the start of the string. */
69  	public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
70  
71  	/**
72  	 * Quote an input string by the quoting rules.
73  	 * <p>
74  	 * If the input string does not require any quoting, the same String
75  	 * reference is returned to the caller.
76  	 * <p>
77  	 * Otherwise a quoted string is returned, including the opening and closing
78  	 * quotation marks at the start and end of the string. If the style does not
79  	 * permit raw Unicode characters then the string will first be encoded in
80  	 * UTF-8, with unprintable sequences possibly escaped by the rules.
81  	 *
82  	 * @param in
83  	 *            any non-null Unicode string.
84  	 * @return a quoted string. See above for details.
85  	 */
86  	public abstract String quote(String in);
87  
88  	/**
89  	 * Clean a previously quoted input, decoding the result via UTF-8.
90  	 * <p>
91  	 * This method must match quote such that:
92  	 *
93  	 * <pre>
94  	 * a.equals(dequote(quote(a)));
95  	 * </pre>
96  	 *
97  	 * is true for any <code>a</code>.
98  	 *
99  	 * @param in
100 	 *            a Unicode string to remove quoting from.
101 	 * @return the cleaned string.
102 	 * @see #dequote(byte[], int, int)
103 	 */
104 	public String dequote(String in) {
105 		final byte[] b = Constants.encode(in);
106 		return dequote(b, 0, b.length);
107 	}
108 
109 	/**
110 	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
111 	 * <p>
112 	 * This method must match quote such that:
113 	 *
114 	 * <pre>
115 	 * a.equals(dequote(Constants.encode(quote(a))));
116 	 * </pre>
117 	 *
118 	 * is true for any <code>a</code>.
119 	 * <p>
120 	 * This method removes any opening/closing quotation marks added by
121 	 * {@link #quote(String)}.
122 	 *
123 	 * @param in
124 	 *            the input buffer to parse.
125 	 * @param offset
126 	 *            first position within <code>in</code> to scan.
127 	 * @param end
128 	 *            one position past in <code>in</code> to scan.
129 	 * @return the cleaned string.
130 	 */
131 	public abstract String dequote(byte[] in, int offset, int end);
132 
133 	/**
134 	 * Quoting style used by the Bourne shell.
135 	 * <p>
136 	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
137 	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
138 	 * being recognized as special.
139 	 */
140 	public static class BourneStyle extends QuotedString {
141 		@Override
142 		public String quote(String in) {
143 			final StringBuilder r = new StringBuilder();
144 			r.append('\'');
145 			int start = 0, i = 0;
146 			for (; i < in.length(); i++) {
147 				switch (in.charAt(i)) {
148 				case '\'':
149 				case '!':
150 					r.append(in, start, i);
151 					r.append('\'');
152 					r.append('\\');
153 					r.append(in.charAt(i));
154 					r.append('\'');
155 					start = i + 1;
156 					break;
157 				}
158 			}
159 			r.append(in, start, i);
160 			r.append('\'');
161 			return r.toString();
162 		}
163 
164 		@Override
165 		public String dequote(byte[] in, int ip, int ie) {
166 			boolean inquote = false;
167 			final byte[] r = new byte[ie - ip];
168 			int rPtr = 0;
169 			while (ip < ie) {
170 				final byte b = in[ip++];
171 				switch (b) {
172 				case '\'':
173 					inquote = !inquote;
174 					continue;
175 				case '\\':
176 					if (inquote || ip == ie)
177 						r[rPtr++] = b; // literal within a quote
178 					else
179 						r[rPtr++] = in[ip++];
180 					continue;
181 				default:
182 					r[rPtr++] = b;
183 					continue;
184 				}
185 			}
186 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
187 		}
188 	}
189 
190 	/** Bourne style, but permits <code>~user</code> at the start of the string. */
191 	public static class BourneUserPathStyle extends BourneStyle {
192 		@Override
193 		public String quote(String in) {
194 			if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
195 				// If the string is just "~user" we can assume they
196 				// mean "~user/".
197 				//
198 				return in + "/"; //$NON-NLS-1$
199 			}
200 
201 			if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
202 				// If the string is of "~/path" or "~user/path"
203 				// we must not escape ~/ or ~user/ from the shell.
204 				//
205 				final int i = in.indexOf('/') + 1;
206 				if (i == in.length())
207 					return in;
208 				return in.substring(0, i) + super.quote(in.substring(i));
209 			}
210 
211 			return super.quote(in);
212 		}
213 	}
214 
215 	/** Quoting style that obeys the rules Git applies to file names */
216 	public static final class GitPathStyle extends QuotedString {
217 		private static final byte[] quote;
218 		static {
219 			quote = new byte[128];
220 			Arrays.fill(quote, (byte) -1);
221 
222 			for (int i = '0'; i <= '9'; i++)
223 				quote[i] = 0;
224 			for (int i = 'a'; i <= 'z'; i++)
225 				quote[i] = 0;
226 			for (int i = 'A'; i <= 'Z'; i++)
227 				quote[i] = 0;
228 			quote[' '] = 0;
229 			quote['$'] = 0;
230 			quote['%'] = 0;
231 			quote['&'] = 0;
232 			quote['*'] = 0;
233 			quote['+'] = 0;
234 			quote[','] = 0;
235 			quote['-'] = 0;
236 			quote['.'] = 0;
237 			quote['/'] = 0;
238 			quote[':'] = 0;
239 			quote[';'] = 0;
240 			quote['='] = 0;
241 			quote['?'] = 0;
242 			quote['@'] = 0;
243 			quote['_'] = 0;
244 			quote['^'] = 0;
245 			quote['|'] = 0;
246 			quote['~'] = 0;
247 
248 			quote['\u0007'] = 'a';
249 			quote['\b'] = 'b';
250 			quote['\f'] = 'f';
251 			quote['\n'] = 'n';
252 			quote['\r'] = 'r';
253 			quote['\t'] = 't';
254 			quote['\u000B'] = 'v';
255 			quote['\\'] = '\\';
256 			quote['"'] = '"';
257 		}
258 
259 		@Override
260 		public String quote(String instr) {
261 			if (instr.length() == 0)
262 				return "\"\""; //$NON-NLS-1$
263 			boolean reuse = true;
264 			final byte[] in = Constants.encode(instr);
265 			final StringBuilder r = new StringBuilder(2 + in.length);
266 			r.append('"');
267 			for (int i = 0; i < in.length; i++) {
268 				final int c = in[i] & 0xff;
269 				if (c < quote.length) {
270 					final byte style = quote[c];
271 					if (style == 0) {
272 						r.append((char) c);
273 						continue;
274 					}
275 					if (style > 0) {
276 						reuse = false;
277 						r.append('\\');
278 						r.append((char) style);
279 						continue;
280 					}
281 				}
282 
283 				reuse = false;
284 				r.append('\\');
285 				r.append((char) (((c >> 6) & 03) + '0'));
286 				r.append((char) (((c >> 3) & 07) + '0'));
287 				r.append((char) (((c >> 0) & 07) + '0'));
288 			}
289 			if (reuse)
290 				return instr;
291 			r.append('"');
292 			return r.toString();
293 		}
294 
295 		@Override
296 		public String dequote(byte[] in, int inPtr, int inEnd) {
297 			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
298 				return dq(in, inPtr + 1, inEnd - 1);
299 			return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
300 		}
301 
302 		private static String dq(byte[] in, int inPtr, int inEnd) {
303 			final byte[] r = new byte[inEnd - inPtr];
304 			int rPtr = 0;
305 			while (inPtr < inEnd) {
306 				final byte b = in[inPtr++];
307 				if (b != '\\') {
308 					r[rPtr++] = b;
309 					continue;
310 				}
311 
312 				if (inPtr == inEnd) {
313 					// Lone trailing backslash. Treat it as a literal.
314 					//
315 					r[rPtr++] = '\\';
316 					break;
317 				}
318 
319 				switch (in[inPtr++]) {
320 				case 'a':
321 					r[rPtr++] = 0x07 /* \a = BEL */;
322 					continue;
323 				case 'b':
324 					r[rPtr++] = '\b';
325 					continue;
326 				case 'f':
327 					r[rPtr++] = '\f';
328 					continue;
329 				case 'n':
330 					r[rPtr++] = '\n';
331 					continue;
332 				case 'r':
333 					r[rPtr++] = '\r';
334 					continue;
335 				case 't':
336 					r[rPtr++] = '\t';
337 					continue;
338 				case 'v':
339 					r[rPtr++] = 0x0B/* \v = VT */;
340 					continue;
341 
342 				case '\\':
343 				case '"':
344 					r[rPtr++] = in[inPtr - 1];
345 					continue;
346 
347 				case '0':
348 				case '1':
349 				case '2':
350 				case '3': {
351 					int cp = in[inPtr - 1] - '0';
352 					for (int n = 1; n < 3 && inPtr < inEnd; n++) {
353 						final byte c = in[inPtr];
354 						if ('0' <= c && c <= '7') {
355 							cp <<= 3;
356 							cp |= c - '0';
357 							inPtr++;
358 						} else {
359 							break;
360 						}
361 					}
362 					r[rPtr++] = (byte) cp;
363 					continue;
364 				}
365 
366 				default:
367 					// Any other code is taken literally.
368 					//
369 					r[rPtr++] = '\\';
370 					r[rPtr++] = in[inPtr - 1];
371 					continue;
372 				}
373 			}
374 
375 			return RawParseUtils.decode(UTF_8, r, 0, rPtr);
376 		}
377 
378 		private GitPathStyle() {
379 			// Singleton
380 		}
381 	}
382 }