View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.util;
45  
46  import java.util.Arrays;
47  
48  import org.eclipse.jgit.lib.Constants;
49  
50  /** Utility functions related to quoted string handling. */
51  public abstract class QuotedString {
52  	/** Quoting style that obeys the rules Git applies to file names */
53  	public static final GitPathStyle GIT_PATH = new GitPathStyle();
54  
55  	/**
56  	 * Quoting style used by the Bourne shell.
57  	 * <p>
58  	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
59  	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
60  	 * being recognized as special.
61  	 */
62  	public static final BourneStyle BOURNE = new BourneStyle();
63  
64  	/** Bourne style, but permits <code>~user</code> at the start of the string. */
65  	public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
66  
67  	/**
68  	 * Quote an input string by the quoting rules.
69  	 * <p>
70  	 * If the input string does not require any quoting, the same String
71  	 * reference is returned to the caller.
72  	 * <p>
73  	 * Otherwise a quoted string is returned, including the opening and closing
74  	 * quotation marks at the start and end of the string. If the style does not
75  	 * permit raw Unicode characters then the string will first be encoded in
76  	 * UTF-8, with unprintable sequences possibly escaped by the rules.
77  	 *
78  	 * @param in
79  	 *            any non-null Unicode string.
80  	 * @return a quoted string. See above for details.
81  	 */
82  	public abstract String quote(String in);
83  
84  	/**
85  	 * Clean a previously quoted input, decoding the result via UTF-8.
86  	 * <p>
87  	 * This method must match quote such that:
88  	 *
89  	 * <pre>
90  	 * a.equals(dequote(quote(a)));
91  	 * </pre>
92  	 *
93  	 * is true for any <code>a</code>.
94  	 *
95  	 * @param in
96  	 *            a Unicode string to remove quoting from.
97  	 * @return the cleaned string.
98  	 * @see #dequote(byte[], int, int)
99  	 */
100 	public String dequote(final String in) {
101 		final byte[] b = Constants.encode(in);
102 		return dequote(b, 0, b.length);
103 	}
104 
105 	/**
106 	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
107 	 * <p>
108 	 * This method must match quote such that:
109 	 *
110 	 * <pre>
111 	 * a.equals(dequote(Constants.encode(quote(a))));
112 	 * </pre>
113 	 *
114 	 * is true for any <code>a</code>.
115 	 * <p>
116 	 * This method removes any opening/closing quotation marks added by
117 	 * {@link #quote(String)}.
118 	 *
119 	 * @param in
120 	 *            the input buffer to parse.
121 	 * @param offset
122 	 *            first position within <code>in</code> to scan.
123 	 * @param end
124 	 *            one position past in <code>in</code> to scan.
125 	 * @return the cleaned string.
126 	 */
127 	public abstract String dequote(byte[] in, int offset, int end);
128 
129 	/**
130 	 * Quoting style used by the Bourne shell.
131 	 * <p>
132 	 * Quotes are unconditionally inserted during {@link #quote(String)}. This
133 	 * protects shell meta-characters like <code>$</code> or <code>~</code> from
134 	 * being recognized as special.
135 	 */
136 	public static class BourneStyle extends QuotedString {
137 		@Override
138 		public String quote(final String in) {
139 			final StringBuilder r = new StringBuilder();
140 			r.append('\'');
141 			int start = 0, i = 0;
142 			for (; i < in.length(); i++) {
143 				switch (in.charAt(i)) {
144 				case '\'':
145 				case '!':
146 					r.append(in, start, i);
147 					r.append('\'');
148 					r.append('\\');
149 					r.append(in.charAt(i));
150 					r.append('\'');
151 					start = i + 1;
152 					break;
153 				}
154 			}
155 			r.append(in, start, i);
156 			r.append('\'');
157 			return r.toString();
158 		}
159 
160 		@Override
161 		public String dequote(final byte[] in, int ip, final int ie) {
162 			boolean inquote = false;
163 			final byte[] r = new byte[ie - ip];
164 			int rPtr = 0;
165 			while (ip < ie) {
166 				final byte b = in[ip++];
167 				switch (b) {
168 				case '\'':
169 					inquote = !inquote;
170 					continue;
171 				case '\\':
172 					if (inquote || ip == ie)
173 						r[rPtr++] = b; // literal within a quote
174 					else
175 						r[rPtr++] = in[ip++];
176 					continue;
177 				default:
178 					r[rPtr++] = b;
179 					continue;
180 				}
181 			}
182 			return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
183 		}
184 	}
185 
186 	/** Bourne style, but permits <code>~user</code> at the start of the string. */
187 	public static class BourneUserPathStyle extends BourneStyle {
188 		@Override
189 		public String quote(final String in) {
190 			if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
191 				// If the string is just "~user" we can assume they
192 				// mean "~user/".
193 				//
194 				return in + "/"; //$NON-NLS-1$
195 			}
196 
197 			if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
198 				// If the string is of "~/path" or "~user/path"
199 				// we must not escape ~/ or ~user/ from the shell.
200 				//
201 				final int i = in.indexOf('/') + 1;
202 				if (i == in.length())
203 					return in;
204 				return in.substring(0, i) + super.quote(in.substring(i));
205 			}
206 
207 			return super.quote(in);
208 		}
209 	}
210 
211 	/** Quoting style that obeys the rules Git applies to file names */
212 	public static final class GitPathStyle extends QuotedString {
213 		private static final byte[] quote;
214 		static {
215 			quote = new byte[128];
216 			Arrays.fill(quote, (byte) -1);
217 
218 			for (int i = '0'; i <= '9'; i++)
219 				quote[i] = 0;
220 			for (int i = 'a'; i <= 'z'; i++)
221 				quote[i] = 0;
222 			for (int i = 'A'; i <= 'Z'; i++)
223 				quote[i] = 0;
224 			quote[' '] = 0;
225 			quote['$'] = 0;
226 			quote['%'] = 0;
227 			quote['&'] = 0;
228 			quote['*'] = 0;
229 			quote['+'] = 0;
230 			quote[','] = 0;
231 			quote['-'] = 0;
232 			quote['.'] = 0;
233 			quote['/'] = 0;
234 			quote[':'] = 0;
235 			quote[';'] = 0;
236 			quote['='] = 0;
237 			quote['?'] = 0;
238 			quote['@'] = 0;
239 			quote['_'] = 0;
240 			quote['^'] = 0;
241 			quote['|'] = 0;
242 			quote['~'] = 0;
243 
244 			quote['\u0007'] = 'a';
245 			quote['\b'] = 'b';
246 			quote['\f'] = 'f';
247 			quote['\n'] = 'n';
248 			quote['\r'] = 'r';
249 			quote['\t'] = 't';
250 			quote['\u000B'] = 'v';
251 			quote['\\'] = '\\';
252 			quote['"'] = '"';
253 		}
254 
255 		@Override
256 		public String quote(final String instr) {
257 			if (instr.length() == 0)
258 				return "\"\""; //$NON-NLS-1$
259 			boolean reuse = true;
260 			final byte[] in = Constants.encode(instr);
261 			final StringBuilder r = new StringBuilder(2 + in.length);
262 			r.append('"');
263 			for (int i = 0; i < in.length; i++) {
264 				final int c = in[i] & 0xff;
265 				if (c < quote.length) {
266 					final byte style = quote[c];
267 					if (style == 0) {
268 						r.append((char) c);
269 						continue;
270 					}
271 					if (style > 0) {
272 						reuse = false;
273 						r.append('\\');
274 						r.append((char) style);
275 						continue;
276 					}
277 				}
278 
279 				reuse = false;
280 				r.append('\\');
281 				r.append((char) (((c >> 6) & 03) + '0'));
282 				r.append((char) (((c >> 3) & 07) + '0'));
283 				r.append((char) (((c >> 0) & 07) + '0'));
284 			}
285 			if (reuse)
286 				return instr;
287 			r.append('"');
288 			return r.toString();
289 		}
290 
291 		@Override
292 		public String dequote(final byte[] in, final int inPtr, final int inEnd) {
293 			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
294 				return dq(in, inPtr + 1, inEnd - 1);
295 			return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
296 		}
297 
298 		private static String dq(final byte[] in, int inPtr, final int inEnd) {
299 			final byte[] r = new byte[inEnd - inPtr];
300 			int rPtr = 0;
301 			while (inPtr < inEnd) {
302 				final byte b = in[inPtr++];
303 				if (b != '\\') {
304 					r[rPtr++] = b;
305 					continue;
306 				}
307 
308 				if (inPtr == inEnd) {
309 					// Lone trailing backslash. Treat it as a literal.
310 					//
311 					r[rPtr++] = '\\';
312 					break;
313 				}
314 
315 				switch (in[inPtr++]) {
316 				case 'a':
317 					r[rPtr++] = 0x07 /* \a = BEL */;
318 					continue;
319 				case 'b':
320 					r[rPtr++] = '\b';
321 					continue;
322 				case 'f':
323 					r[rPtr++] = '\f';
324 					continue;
325 				case 'n':
326 					r[rPtr++] = '\n';
327 					continue;
328 				case 'r':
329 					r[rPtr++] = '\r';
330 					continue;
331 				case 't':
332 					r[rPtr++] = '\t';
333 					continue;
334 				case 'v':
335 					r[rPtr++] = 0x0B/* \v = VT */;
336 					continue;
337 
338 				case '\\':
339 				case '"':
340 					r[rPtr++] = in[inPtr - 1];
341 					continue;
342 
343 				case '0':
344 				case '1':
345 				case '2':
346 				case '3': {
347 					int cp = in[inPtr - 1] - '0';
348 					for (int n = 1; n < 3 && inPtr < inEnd; n++) {
349 						final byte c = in[inPtr];
350 						if ('0' <= c && c <= '7') {
351 							cp <<= 3;
352 							cp |= c - '0';
353 							inPtr++;
354 						} else {
355 							break;
356 						}
357 					}
358 					r[rPtr++] = (byte) cp;
359 					continue;
360 				}
361 
362 				default:
363 					// Any other code is taken literally.
364 					//
365 					r[rPtr++] = '\\';
366 					r[rPtr++] = in[inPtr - 1];
367 					continue;
368 				}
369 			}
370 
371 			return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
372 		}
373 
374 		private GitPathStyle() {
375 			// Singleton
376 		}
377 	}
378 }