View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.util;
45  
46  import java.util.Arrays;
47  
48  import org.eclipse.jgit.lib.Constants;
49  
50  /**
51   * Utility functions related to quoted string handling.
52   */
53  public abstract class QuotedString {
54  	/** Quoting style that obeys the rules Git applies to file names */
55  	public static final GitPathStyle GIT_PATH = new GitPathStyle();
56  
57  	/**
58  	 * Quoting style used by the Bourne shell.
59  	 * <p>
60  	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
61  	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
62  	 * being recognized as special.
63  	 */
64  	public static final BourneStyle BOURNE = new BourneStyle();
65  
66  	/** Bourne style, but permits <code>~user</code> at the start of the string. */
67  	public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
68  
69  	/**
70  	 * Quote an input string by the quoting rules.
71  	 * <p>
72  	 * If the input string does not require any quoting, the same String
73  	 * reference is returned to the caller.
74  	 * <p>
75  	 * Otherwise a quoted string is returned, including the opening and closing
76  	 * quotation marks at the start and end of the string. If the style does not
77  	 * permit raw Unicode characters then the string will first be encoded in
78  	 * UTF-8, with unprintable sequences possibly escaped by the rules.
79  	 *
80  	 * @param in
81  	 *            any non-null Unicode string.
82  	 * @return a quoted string. See above for details.
83  	 */
84  	public abstract String quote(String in);
85  
86  	/**
87  	 * Clean a previously quoted input, decoding the result via UTF-8.
88  	 * <p>
89  	 * This method must match quote such that:
90  	 *
91  	 * <pre>
92  	 * a.equals(dequote(quote(a)));
93  	 * </pre>
94  	 *
95  	 * is true for any <code>a</code>.
96  	 *
97  	 * @param in
98  	 *            a Unicode string to remove quoting from.
99  	 * @return the cleaned string.
100 	 * @see #dequote(byte[], int, int)
101 	 */
102 	public String dequote(String in) {
103 		final byte[] b = Constants.encode(in);
104 		return dequote(b, 0, b.length);
105 	}
106 
107 	/**
108 	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
109 	 * <p>
110 	 * This method must match quote such that:
111 	 *
112 	 * <pre>
113 	 * a.equals(dequote(Constants.encode(quote(a))));
114 	 * </pre>
115 	 *
116 	 * is true for any <code>a</code>.
117 	 * <p>
118 	 * This method removes any opening/closing quotation marks added by
119 	 * {@link #quote(String)}.
120 	 *
121 	 * @param in
122 	 *            the input buffer to parse.
123 	 * @param offset
124 	 *            first position within <code>in</code> to scan.
125 	 * @param end
126 	 *            one position past in <code>in</code> to scan.
127 	 * @return the cleaned string.
128 	 */
129 	public abstract String dequote(byte[] in, int offset, int end);
130 
131 	/**
132 	 * Quoting style used by the Bourne shell.
133 	 * <p>
134 	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
135 	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
136 	 * being recognized as special.
137 	 */
138 	public static class BourneStyle extends QuotedString {
139 		@Override
140 		public String quote(String in) {
141 			final StringBuilder r = new StringBuilder();
142 			r.append('\'');
143 			int start = 0, i = 0;
144 			for (; i < in.length(); i++) {
145 				switch (in.charAt(i)) {
146 				case '\'':
147 				case '!':
148 					r.append(in, start, i);
149 					r.append('\'');
150 					r.append('\\');
151 					r.append(in.charAt(i));
152 					r.append('\'');
153 					start = i + 1;
154 					break;
155 				}
156 			}
157 			r.append(in, start, i);
158 			r.append('\'');
159 			return r.toString();
160 		}
161 
162 		@Override
163 		public String dequote(byte[] in, int ip, int ie) {
164 			boolean inquote = false;
165 			final byte[] r = new byte[ie - ip];
166 			int rPtr = 0;
167 			while (ip < ie) {
168 				final byte b = in[ip++];
169 				switch (b) {
170 				case '\'':
171 					inquote = !inquote;
172 					continue;
173 				case '\\':
174 					if (inquote || ip == ie)
175 						r[rPtr++] = b; // literal within a quote
176 					else
177 						r[rPtr++] = in[ip++];
178 					continue;
179 				default:
180 					r[rPtr++] = b;
181 					continue;
182 				}
183 			}
184 			return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
185 		}
186 	}
187 
188 	/** Bourne style, but permits <code>~user</code> at the start of the string. */
189 	public static class BourneUserPathStyle extends BourneStyle {
190 		@Override
191 		public String quote(String in) {
192 			if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
193 				// If the string is just "~user" we can assume they
194 				// mean "~user/".
195 				//
196 				return in + "/"; //$NON-NLS-1$
197 			}
198 
199 			if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
200 				// If the string is of "~/path" or "~user/path"
201 				// we must not escape ~/ or ~user/ from the shell.
202 				//
203 				final int i = in.indexOf('/') + 1;
204 				if (i == in.length())
205 					return in;
206 				return in.substring(0, i) + super.quote(in.substring(i));
207 			}
208 
209 			return super.quote(in);
210 		}
211 	}
212 
213 	/** Quoting style that obeys the rules Git applies to file names */
214 	public static final class GitPathStyle extends QuotedString {
215 		private static final byte[] quote;
216 		static {
217 			quote = new byte[128];
218 			Arrays.fill(quote, (byte) -1);
219 
220 			for (int i = '0'; i <= '9'; i++)
221 				quote[i] = 0;
222 			for (int i = 'a'; i <= 'z'; i++)
223 				quote[i] = 0;
224 			for (int i = 'A'; i <= 'Z'; i++)
225 				quote[i] = 0;
226 			quote[' '] = 0;
227 			quote['$'] = 0;
228 			quote['%'] = 0;
229 			quote['&'] = 0;
230 			quote['*'] = 0;
231 			quote['+'] = 0;
232 			quote[','] = 0;
233 			quote['-'] = 0;
234 			quote['.'] = 0;
235 			quote['/'] = 0;
236 			quote[':'] = 0;
237 			quote[';'] = 0;
238 			quote['='] = 0;
239 			quote['?'] = 0;
240 			quote['@'] = 0;
241 			quote['_'] = 0;
242 			quote['^'] = 0;
243 			quote['|'] = 0;
244 			quote['~'] = 0;
245 
246 			quote['\u0007'] = 'a';
247 			quote['\b'] = 'b';
248 			quote['\f'] = 'f';
249 			quote['\n'] = 'n';
250 			quote['\r'] = 'r';
251 			quote['\t'] = 't';
252 			quote['\u000B'] = 'v';
253 			quote['\\'] = '\\';
254 			quote['"'] = '"';
255 		}
256 
257 		@Override
258 		public String quote(String instr) {
259 			if (instr.length() == 0)
260 				return "\"\""; //$NON-NLS-1$
261 			boolean reuse = true;
262 			final byte[] in = Constants.encode(instr);
263 			final StringBuilder r = new StringBuilder(2 + in.length);
264 			r.append('"');
265 			for (int i = 0; i < in.length; i++) {
266 				final int c = in[i] & 0xff;
267 				if (c < quote.length) {
268 					final byte style = quote[c];
269 					if (style == 0) {
270 						r.append((char) c);
271 						continue;
272 					}
273 					if (style > 0) {
274 						reuse = false;
275 						r.append('\\');
276 						r.append((char) style);
277 						continue;
278 					}
279 				}
280 
281 				reuse = false;
282 				r.append('\\');
283 				r.append((char) (((c >> 6) & 03) + '0'));
284 				r.append((char) (((c >> 3) & 07) + '0'));
285 				r.append((char) (((c >> 0) & 07) + '0'));
286 			}
287 			if (reuse)
288 				return instr;
289 			r.append('"');
290 			return r.toString();
291 		}
292 
293 		@Override
294 		public String dequote(byte[] in, int inPtr, int inEnd) {
295 			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
296 				return dq(in, inPtr + 1, inEnd - 1);
297 			return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
298 		}
299 
300 		private static String dq(byte[] in, int inPtr, int inEnd) {
301 			final byte[] r = new byte[inEnd - inPtr];
302 			int rPtr = 0;
303 			while (inPtr < inEnd) {
304 				final byte b = in[inPtr++];
305 				if (b != '\\') {
306 					r[rPtr++] = b;
307 					continue;
308 				}
309 
310 				if (inPtr == inEnd) {
311 					// Lone trailing backslash. Treat it as a literal.
312 					//
313 					r[rPtr++] = '\\';
314 					break;
315 				}
316 
317 				switch (in[inPtr++]) {
318 				case 'a':
319 					r[rPtr++] = 0x07 /* \a = BEL */;
320 					continue;
321 				case 'b':
322 					r[rPtr++] = '\b';
323 					continue;
324 				case 'f':
325 					r[rPtr++] = '\f';
326 					continue;
327 				case 'n':
328 					r[rPtr++] = '\n';
329 					continue;
330 				case 'r':
331 					r[rPtr++] = '\r';
332 					continue;
333 				case 't':
334 					r[rPtr++] = '\t';
335 					continue;
336 				case 'v':
337 					r[rPtr++] = 0x0B/* \v = VT */;
338 					continue;
339 
340 				case '\\':
341 				case '"':
342 					r[rPtr++] = in[inPtr - 1];
343 					continue;
344 
345 				case '0':
346 				case '1':
347 				case '2':
348 				case '3': {
349 					int cp = in[inPtr - 1] - '0';
350 					for (int n = 1; n < 3 && inPtr < inEnd; n++) {
351 						final byte c = in[inPtr];
352 						if ('0' <= c && c <= '7') {
353 							cp <<= 3;
354 							cp |= c - '0';
355 							inPtr++;
356 						} else {
357 							break;
358 						}
359 					}
360 					r[rPtr++] = (byte) cp;
361 					continue;
362 				}
363 
364 				default:
365 					// Any other code is taken literally.
366 					//
367 					r[rPtr++] = '\\';
368 					r[rPtr++] = in[inPtr - 1];
369 					continue;
370 				}
371 			}
372 
373 			return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
374 		}
375 
376 		private GitPathStyle() {
377 			// Singleton
378 		}
379 	}
380 }