View Javadoc
1   /*
2    * Copyright (C) 2008-2010, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.lib;
46  
47  import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
48  import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
49  import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
50  import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
51  import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
52  import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
53  import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
54  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
55  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
56  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
57  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
58  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
59  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
60  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
61  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
62  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
63  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
64  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
65  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
66  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
67  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
68  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
69  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
70  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
71  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
72  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
73  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
74  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
75  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
76  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
77  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
78  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
79  import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
80  import static org.eclipse.jgit.util.Paths.compare;
81  import static org.eclipse.jgit.util.Paths.compareSameName;
82  import static org.eclipse.jgit.util.RawParseUtils.nextLF;
83  import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
84  
85  import java.text.MessageFormat;
86  import java.text.Normalizer;
87  import java.util.EnumSet;
88  import java.util.HashSet;
89  import java.util.Locale;
90  import java.util.Set;
91  
92  import org.eclipse.jgit.annotations.NonNull;
93  import org.eclipse.jgit.annotations.Nullable;
94  import org.eclipse.jgit.errors.CorruptObjectException;
95  import org.eclipse.jgit.internal.JGitText;
96  import org.eclipse.jgit.util.MutableInteger;
97  import org.eclipse.jgit.util.RawParseUtils;
98  import org.eclipse.jgit.util.StringUtils;
99  
100 /**
101  * Verifies that an object is formatted correctly.
102  * <p>
103  * Verifications made by this class only check that the fields of an object are
104  * formatted correctly. The ObjectId checksum of the object is not verified, and
105  * connectivity links between objects are also not verified. Its assumed that
106  * the caller can provide both of these validations on its own.
107  * <p>
108  * Instances of this class are not thread safe, but they may be reused to
109  * perform multiple object validations.
110  */
111 public class ObjectChecker {
112 	/** Header "tree " */
113 	public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$
114 
115 	/** Header "parent " */
116 	public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$
117 
118 	/** Header "author " */
119 	public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$
120 
121 	/** Header "committer " */
122 	public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$
123 
124 	/** Header "encoding " */
125 	public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$
126 
127 	/** Header "object " */
128 	public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$
129 
130 	/** Header "type " */
131 	public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$
132 
133 	/** Header "tag " */
134 	public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$
135 
136 	/** Header "tagger " */
137 	public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
138 
139 	/**
140 	 * Potential issues identified by the checker.
141 	 *
142 	 * @since 4.2
143 	 */
144 	public enum ErrorType {
145 		// @formatter:off
146 		// These names match git-core so that fsck section keys also match.
147 		/***/ NULL_SHA1,
148 		/***/ DUPLICATE_ENTRIES,
149 		/***/ TREE_NOT_SORTED,
150 		/***/ ZERO_PADDED_FILEMODE,
151 		/***/ EMPTY_NAME,
152 		/***/ FULL_PATHNAME,
153 		/***/ HAS_DOT,
154 		/***/ HAS_DOTDOT,
155 		/***/ HAS_DOTGIT,
156 		/***/ BAD_OBJECT_SHA1,
157 		/***/ BAD_PARENT_SHA1,
158 		/***/ BAD_TREE_SHA1,
159 		/***/ MISSING_AUTHOR,
160 		/***/ MISSING_COMMITTER,
161 		/***/ MISSING_OBJECT,
162 		/***/ MISSING_TREE,
163 		/***/ MISSING_TYPE_ENTRY,
164 		/***/ MISSING_TAG_ENTRY,
165 		/***/ BAD_DATE,
166 		/***/ BAD_EMAIL,
167 		/***/ BAD_TIMEZONE,
168 		/***/ MISSING_EMAIL,
169 		/***/ MISSING_SPACE_BEFORE_DATE,
170 		/***/ UNKNOWN_TYPE,
171 
172 		// These are unique to JGit.
173 		/***/ WIN32_BAD_NAME,
174 		/***/ BAD_UTF8;
175 		// @formatter:on
176 
177 		/** @return camelCaseVersion of the name. */
178 		public String getMessageId() {
179 			String n = name();
180 			StringBuilder r = new StringBuilder(n.length());
181 			for (int i = 0; i < n.length(); i++) {
182 				char c = n.charAt(i);
183 				if (c != '_') {
184 					r.append(StringUtils.toLowerCase(c));
185 				} else {
186 					r.append(n.charAt(++i));
187 				}
188 			}
189 			return r.toString();
190 		}
191 	}
192 
193 	private final MutableObjectId tempId = new MutableObjectId();
194 	private final MutableInteger bufPtr = new MutableInteger();
195 
196 	private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
197 	private ObjectIdSet skipList;
198 	private boolean allowInvalidPersonIdent;
199 	private boolean windows;
200 	private boolean macosx;
201 
202 	/**
203 	 * Enable accepting specific malformed (but not horribly broken) objects.
204 	 *
205 	 * @param objects
206 	 *            collection of object names known to be broken in a non-fatal
207 	 *            way that should be ignored by the checker.
208 	 * @return {@code this}
209 	 * @since 4.2
210 	 */
211 	public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
212 		skipList = objects;
213 		return this;
214 	}
215 
216 	/**
217 	 * Configure error types to be ignored across all objects.
218 	 *
219 	 * @param ids
220 	 *            error types to ignore. The caller's set is copied.
221 	 * @return {@code this}
222 	 * @since 4.2
223 	 */
224 	public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
225 		errors = EnumSet.allOf(ErrorType.class);
226 		if (ids != null) {
227 			errors.removeAll(ids);
228 		}
229 		return this;
230 	}
231 
232 	/**
233 	 * Add message type to be ignored across all objects.
234 	 *
235 	 * @param id
236 	 *            error type to ignore.
237 	 * @param ignore
238 	 *            true to ignore this error; false to treat the error as an
239 	 *            error and throw.
240 	 * @return {@code this}
241 	 * @since 4.2
242 	 */
243 	public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
244 		if (ignore) {
245 			errors.remove(id);
246 		} else {
247 			errors.add(id);
248 		}
249 		return this;
250 	}
251 
252 	/**
253 	 * Enable accepting leading zero mode in tree entries.
254 	 * <p>
255 	 * Some broken Git libraries generated leading zeros in the mode part of
256 	 * tree entries. This is technically incorrect but gracefully allowed by
257 	 * git-core. JGit rejects such trees by default, but may need to accept
258 	 * them on broken histories.
259 	 * <p>
260 	 * Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
261 	 *
262 	 * @param allow allow leading zero mode.
263 	 * @return {@code this}.
264 	 * @since 3.4
265 	 */
266 	public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
267 		return setIgnore(ZERO_PADDED_FILEMODE, allow);
268 	}
269 
270 	/**
271 	 * Enable accepting invalid author, committer and tagger identities.
272 	 * <p>
273 	 * Some broken Git versions/libraries allowed users to create commits and
274 	 * tags with invalid formatting between the name, email and timestamp.
275 	 *
276 	 * @param allow
277 	 *            if true accept invalid person identity strings.
278 	 * @return {@code this}.
279 	 * @since 4.0
280 	 */
281 	public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
282 		allowInvalidPersonIdent = allow;
283 		return this;
284 	}
285 
286 	/**
287 	 * Restrict trees to only names legal on Windows platforms.
288 	 * <p>
289 	 * Also rejects any mixed case forms of reserved names ({@code .git}).
290 	 *
291 	 * @param win true if Windows name checking should be performed.
292 	 * @return {@code this}.
293 	 * @since 3.4
294 	 */
295 	public ObjectChecker setSafeForWindows(boolean win) {
296 		windows = win;
297 		return this;
298 	}
299 
300 	/**
301 	 * Restrict trees to only names legal on Mac OS X platforms.
302 	 * <p>
303 	 * Rejects any mixed case forms of reserved names ({@code .git})
304 	 * for users working on HFS+ in case-insensitive (default) mode.
305 	 *
306 	 * @param mac true if Mac OS X name checking should be performed.
307 	 * @return {@code this}.
308 	 * @since 3.4
309 	 */
310 	public ObjectChecker setSafeForMacOS(boolean mac) {
311 		macosx = mac;
312 		return this;
313 	}
314 
315 	/**
316 	 * Check an object for parsing errors.
317 	 *
318 	 * @param objType
319 	 *            type of the object. Must be a valid object type code in
320 	 *            {@link org.eclipse.jgit.lib.Constants}.
321 	 * @param raw
322 	 *            the raw data which comprises the object. This should be in the
323 	 *            canonical format (that is the format used to generate the
324 	 *            ObjectId of the object). The array is never modified.
325 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
326 	 *             if an error is identified.
327 	 */
328 	public void check(int objType, byte[] raw)
329 			throws CorruptObjectException {
330 		check(idFor(objType, raw), objType, raw);
331 	}
332 
333 	/**
334 	 * Check an object for parsing errors.
335 	 *
336 	 * @param id
337 	 *            identify of the object being checked.
338 	 * @param objType
339 	 *            type of the object. Must be a valid object type code in
340 	 *            {@link org.eclipse.jgit.lib.Constants}.
341 	 * @param raw
342 	 *            the raw data which comprises the object. This should be in the
343 	 *            canonical format (that is the format used to generate the
344 	 *            ObjectId of the object). The array is never modified.
345 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
346 	 *             if an error is identified.
347 	 * @since 4.2
348 	 */
349 	public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
350 			throws CorruptObjectException {
351 		switch (objType) {
352 		case OBJ_COMMIT:
353 			checkCommit(id, raw);
354 			break;
355 		case OBJ_TAG:
356 			checkTag(id, raw);
357 			break;
358 		case OBJ_TREE:
359 			checkTree(id, raw);
360 			break;
361 		case OBJ_BLOB:
362 			BlobObjectChecker checker = newBlobObjectChecker();
363 			if (checker == null) {
364 				checkBlob(raw);
365 			} else {
366 				checker.update(raw, 0, raw.length);
367 				checker.endBlob(id);
368 			}
369 			break;
370 		default:
371 			report(UNKNOWN_TYPE, id, MessageFormat.format(
372 					JGitText.get().corruptObjectInvalidType2,
373 					Integer.valueOf(objType)));
374 		}
375 	}
376 
377 	private boolean checkId(byte[] raw) {
378 		int p = bufPtr.value;
379 		try {
380 			tempId.fromString(raw, p);
381 		} catch (IllegalArgumentException e) {
382 			bufPtr.value = nextLF(raw, p);
383 			return false;
384 		}
385 
386 		p += OBJECT_ID_STRING_LENGTH;
387 		if (raw[p] == '\n') {
388 			bufPtr.value = p + 1;
389 			return true;
390 		}
391 		bufPtr.value = nextLF(raw, p);
392 		return false;
393 	}
394 
395 	private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id)
396 			throws CorruptObjectException {
397 		if (allowInvalidPersonIdent) {
398 			bufPtr.value = nextLF(raw, bufPtr.value);
399 			return;
400 		}
401 
402 		final int emailB = nextLF(raw, bufPtr.value, '<');
403 		if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
404 			report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
405 			bufPtr.value = nextLF(raw, bufPtr.value);
406 			return;
407 		}
408 
409 		final int emailE = nextLF(raw, emailB, '>');
410 		if (emailE == emailB || raw[emailE - 1] != '>') {
411 			report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
412 			bufPtr.value = nextLF(raw, bufPtr.value);
413 			return;
414 		}
415 		if (emailE == raw.length || raw[emailE] != ' ') {
416 			report(MISSING_SPACE_BEFORE_DATE, id,
417 					JGitText.get().corruptObjectBadDate);
418 			bufPtr.value = nextLF(raw, bufPtr.value);
419 			return;
420 		}
421 
422 		parseBase10(raw, emailE + 1, bufPtr); // when
423 		if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length
424 				|| raw[bufPtr.value] != ' ') {
425 			report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
426 			bufPtr.value = nextLF(raw, bufPtr.value);
427 			return;
428 		}
429 
430 		int p = bufPtr.value + 1;
431 		parseBase10(raw, p, bufPtr); // tz offset
432 		if (p == bufPtr.value) {
433 			report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
434 			bufPtr.value = nextLF(raw, bufPtr.value);
435 			return;
436 		}
437 
438 		p = bufPtr.value;
439 		if (raw[p] == '\n') {
440 			bufPtr.value = p + 1;
441 		} else {
442 			report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
443 			bufPtr.value = nextLF(raw, p);
444 		}
445 	}
446 
447 	/**
448 	 * Check a commit for errors.
449 	 *
450 	 * @param raw
451 	 *            the commit data. The array is never modified.
452 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
453 	 *             if any error was detected.
454 	 */
455 	public void checkCommit(byte[] raw) throws CorruptObjectException {
456 		checkCommit(idFor(OBJ_COMMIT, raw), raw);
457 	}
458 
459 	/**
460 	 * Check a commit for errors.
461 	 *
462 	 * @param id
463 	 *            identity of the object being checked.
464 	 * @param raw
465 	 *            the commit data. The array is never modified.
466 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
467 	 *             if any error was detected.
468 	 * @since 4.2
469 	 */
470 	public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
471 			throws CorruptObjectException {
472 		bufPtr.value = 0;
473 
474 		if (!match(raw, tree)) {
475 			report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
476 		} else if (!checkId(raw)) {
477 			report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
478 		}
479 
480 		while (match(raw, parent)) {
481 			if (!checkId(raw)) {
482 				report(BAD_PARENT_SHA1, id,
483 						JGitText.get().corruptObjectInvalidParent);
484 			}
485 		}
486 
487 		if (match(raw, author)) {
488 			checkPersonIdent(raw, id);
489 		} else {
490 			report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
491 		}
492 
493 		if (match(raw, committer)) {
494 			checkPersonIdent(raw, id);
495 		} else {
496 			report(MISSING_COMMITTER, id,
497 					JGitText.get().corruptObjectNoCommitter);
498 		}
499 	}
500 
501 	/**
502 	 * Check an annotated tag for errors.
503 	 *
504 	 * @param raw
505 	 *            the tag data. The array is never modified.
506 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
507 	 *             if any error was detected.
508 	 */
509 	public void checkTag(byte[] raw) throws CorruptObjectException {
510 		checkTag(idFor(OBJ_TAG, raw), raw);
511 	}
512 
513 	/**
514 	 * Check an annotated tag for errors.
515 	 *
516 	 * @param id
517 	 *            identity of the object being checked.
518 	 * @param raw
519 	 *            the tag data. The array is never modified.
520 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
521 	 *             if any error was detected.
522 	 * @since 4.2
523 	 */
524 	public void checkTag(@Nullable AnyObjectId id, byte[] raw)
525 			throws CorruptObjectException {
526 		bufPtr.value = 0;
527 		if (!match(raw, object)) {
528 			report(MISSING_OBJECT, id,
529 					JGitText.get().corruptObjectNoObjectHeader);
530 		} else if (!checkId(raw)) {
531 			report(BAD_OBJECT_SHA1, id,
532 					JGitText.get().corruptObjectInvalidObject);
533 		}
534 
535 		if (!match(raw, type)) {
536 			report(MISSING_TYPE_ENTRY, id,
537 					JGitText.get().corruptObjectNoTypeHeader);
538 		}
539 		bufPtr.value = nextLF(raw, bufPtr.value);
540 
541 		if (!match(raw, tag)) {
542 			report(MISSING_TAG_ENTRY, id,
543 					JGitText.get().corruptObjectNoTagHeader);
544 		}
545 		bufPtr.value = nextLF(raw, bufPtr.value);
546 
547 		if (match(raw, tagger)) {
548 			checkPersonIdent(raw, id);
549 		}
550 	}
551 
552 	private static boolean duplicateName(final byte[] raw,
553 			final int thisNamePos, final int thisNameEnd) {
554 		final int sz = raw.length;
555 		int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
556 		for (;;) {
557 			int nextMode = 0;
558 			for (;;) {
559 				if (nextPtr >= sz)
560 					return false;
561 				final byte c = raw[nextPtr++];
562 				if (' ' == c)
563 					break;
564 				nextMode <<= 3;
565 				nextMode += c - '0';
566 			}
567 
568 			final int nextNamePos = nextPtr;
569 			for (;;) {
570 				if (nextPtr == sz)
571 					return false;
572 				final byte c = raw[nextPtr++];
573 				if (c == 0)
574 					break;
575 			}
576 			if (nextNamePos + 1 == nextPtr)
577 				return false;
578 
579 			int cmp = compareSameName(
580 					raw, thisNamePos, thisNameEnd,
581 					raw, nextNamePos, nextPtr - 1, nextMode);
582 			if (cmp < 0)
583 				return false;
584 			else if (cmp == 0)
585 				return true;
586 
587 			nextPtr += Constants.OBJECT_ID_LENGTH;
588 		}
589 	}
590 
591 	/**
592 	 * Check a canonical formatted tree for errors.
593 	 *
594 	 * @param raw
595 	 *            the raw tree data. The array is never modified.
596 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
597 	 *             if any error was detected.
598 	 */
599 	public void checkTree(byte[] raw) throws CorruptObjectException {
600 		checkTree(idFor(OBJ_TREE, raw), raw);
601 	}
602 
603 	/**
604 	 * Check a canonical formatted tree for errors.
605 	 *
606 	 * @param id
607 	 *            identity of the object being checked.
608 	 * @param raw
609 	 *            the raw tree data. The array is never modified.
610 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
611 	 *             if any error was detected.
612 	 * @since 4.2
613 	 */
614 	public void checkTree(@Nullable AnyObjectId id, byte[] raw)
615 			throws CorruptObjectException {
616 		final int sz = raw.length;
617 		int ptr = 0;
618 		int lastNameB = 0, lastNameE = 0, lastMode = 0;
619 		Set<String> normalized = windows || macosx
620 				? new HashSet<>()
621 				: null;
622 
623 		while (ptr < sz) {
624 			int thisMode = 0;
625 			for (;;) {
626 				if (ptr == sz) {
627 					throw new CorruptObjectException(
628 							JGitText.get().corruptObjectTruncatedInMode);
629 				}
630 				final byte c = raw[ptr++];
631 				if (' ' == c)
632 					break;
633 				if (c < '0' || c > '7') {
634 					throw new CorruptObjectException(
635 							JGitText.get().corruptObjectInvalidModeChar);
636 				}
637 				if (thisMode == 0 && c == '0') {
638 					report(ZERO_PADDED_FILEMODE, id,
639 							JGitText.get().corruptObjectInvalidModeStartsZero);
640 				}
641 				thisMode <<= 3;
642 				thisMode += c - '0';
643 			}
644 
645 			if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
646 				throw new CorruptObjectException(MessageFormat.format(
647 						JGitText.get().corruptObjectInvalidMode2,
648 						Integer.valueOf(thisMode)));
649 			}
650 
651 			final int thisNameB = ptr;
652 			ptr = scanPathSegment(raw, ptr, sz, id);
653 			if (ptr == sz || raw[ptr] != 0) {
654 				throw new CorruptObjectException(
655 						JGitText.get().corruptObjectTruncatedInName);
656 			}
657 			checkPathSegment2(raw, thisNameB, ptr, id);
658 			if (normalized != null) {
659 				if (!normalized.add(normalize(raw, thisNameB, ptr))) {
660 					report(DUPLICATE_ENTRIES, id,
661 							JGitText.get().corruptObjectDuplicateEntryNames);
662 				}
663 			} else if (duplicateName(raw, thisNameB, ptr)) {
664 				report(DUPLICATE_ENTRIES, id,
665 						JGitText.get().corruptObjectDuplicateEntryNames);
666 			}
667 
668 			if (lastNameB != 0) {
669 				int cmp = compare(
670 						raw, lastNameB, lastNameE, lastMode,
671 						raw, thisNameB, ptr, thisMode);
672 				if (cmp > 0) {
673 					report(TREE_NOT_SORTED, id,
674 							JGitText.get().corruptObjectIncorrectSorting);
675 				}
676 			}
677 
678 			lastNameB = thisNameB;
679 			lastNameE = ptr;
680 			lastMode = thisMode;
681 
682 			ptr += 1 + OBJECT_ID_LENGTH;
683 			if (ptr > sz) {
684 				throw new CorruptObjectException(
685 						JGitText.get().corruptObjectTruncatedInObjectId);
686 			}
687 			if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
688 				report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
689 			}
690 		}
691 	}
692 
693 	private int scanPathSegment(byte[] raw, int ptr, int end,
694 			@Nullable AnyObjectId id) throws CorruptObjectException {
695 		for (; ptr < end; ptr++) {
696 			byte c = raw[ptr];
697 			if (c == 0) {
698 				return ptr;
699 			}
700 			if (c == '/') {
701 				report(FULL_PATHNAME, id,
702 						JGitText.get().corruptObjectNameContainsSlash);
703 			}
704 			if (windows && isInvalidOnWindows(c)) {
705 				if (c > 31) {
706 					throw new CorruptObjectException(String.format(
707 							JGitText.get().corruptObjectNameContainsChar,
708 							Byte.valueOf(c)));
709 				}
710 				throw new CorruptObjectException(String.format(
711 						JGitText.get().corruptObjectNameContainsByte,
712 						Integer.valueOf(c & 0xff)));
713 			}
714 		}
715 		return ptr;
716 	}
717 
718 	@Nullable
719 	private ObjectId idFor(int objType, byte[] raw) {
720 		if (skipList != null) {
721 			try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
722 				return fmt.idFor(objType, raw);
723 			}
724 		}
725 		return null;
726 	}
727 
728 	private void report(@NonNull ErrorType err, @Nullable AnyObjectId id,
729 			String why) throws CorruptObjectException {
730 		if (errors.contains(err)
731 				&& (id == null || skipList == null || !skipList.contains(id))) {
732 			if (id != null) {
733 				throw new CorruptObjectException(err, id, why);
734 			}
735 			throw new CorruptObjectException(why);
736 		}
737 	}
738 
739 	/**
740 	 * Check tree path entry for validity.
741 	 * <p>
742 	 * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
743 	 * multi-directory path string such as {@code "src/main.c"}.
744 	 *
745 	 * @param path
746 	 *            path string to scan.
747 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
748 	 *             path is invalid.
749 	 * @since 3.6
750 	 */
751 	public void checkPath(String path) throws CorruptObjectException {
752 		byte[] buf = Constants.encode(path);
753 		checkPath(buf, 0, buf.length);
754 	}
755 
756 	/**
757 	 * Check tree path entry for validity.
758 	 * <p>
759 	 * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
760 	 * multi-directory path string such as {@code "src/main.c"}.
761 	 *
762 	 * @param raw
763 	 *            buffer to scan.
764 	 * @param ptr
765 	 *            offset to first byte of the name.
766 	 * @param end
767 	 *            offset to one past last byte of name.
768 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
769 	 *             path is invalid.
770 	 * @since 3.6
771 	 */
772 	public void checkPath(byte[] raw, int ptr, int end)
773 			throws CorruptObjectException {
774 		int start = ptr;
775 		for (; ptr < end; ptr++) {
776 			if (raw[ptr] == '/') {
777 				checkPathSegment(raw, start, ptr);
778 				start = ptr + 1;
779 			}
780 		}
781 		checkPathSegment(raw, start, end);
782 	}
783 
784 	/**
785 	 * Check tree path entry for validity.
786 	 *
787 	 * @param raw
788 	 *            buffer to scan.
789 	 * @param ptr
790 	 *            offset to first byte of the name.
791 	 * @param end
792 	 *            offset to one past last byte of name.
793 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
794 	 *             name is invalid.
795 	 * @since 3.4
796 	 */
797 	public void checkPathSegment(byte[] raw, int ptr, int end)
798 			throws CorruptObjectException {
799 		int e = scanPathSegment(raw, ptr, end, null);
800 		if (e < end && raw[e] == 0)
801 			throw new CorruptObjectException(
802 					JGitText.get().corruptObjectNameContainsNullByte);
803 		checkPathSegment2(raw, ptr, end, null);
804 	}
805 
806 	private void checkPathSegment2(byte[] raw, int ptr, int end,
807 			@Nullable AnyObjectId id) throws CorruptObjectException {
808 		if (ptr == end) {
809 			report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
810 			return;
811 		}
812 
813 		if (raw[ptr] == '.') {
814 			switch (end - ptr) {
815 			case 1:
816 				report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
817 				break;
818 			case 2:
819 				if (raw[ptr + 1] == '.') {
820 					report(HAS_DOTDOT, id,
821 							JGitText.get().corruptObjectNameDotDot);
822 				}
823 				break;
824 			case 4:
825 				if (isGit(raw, ptr + 1)) {
826 					report(HAS_DOTGIT, id, String.format(
827 							JGitText.get().corruptObjectInvalidName,
828 							RawParseUtils.decode(raw, ptr, end)));
829 				}
830 				break;
831 			default:
832 				if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
833 					report(HAS_DOTGIT, id, String.format(
834 							JGitText.get().corruptObjectInvalidName,
835 							RawParseUtils.decode(raw, ptr, end)));
836 				}
837 			}
838 		} else if (isGitTilde1(raw, ptr, end)) {
839 			report(HAS_DOTGIT, id, String.format(
840 					JGitText.get().corruptObjectInvalidName,
841 					RawParseUtils.decode(raw, ptr, end)));
842 		}
843 		if (macosx && isMacHFSGit(raw, ptr, end, id)) {
844 			report(HAS_DOTGIT, id, String.format(
845 					JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
846 					RawParseUtils.decode(raw, ptr, end)));
847 		}
848 
849 		if (windows) {
850 			// Windows ignores space and dot at end of file name.
851 			if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
852 				report(WIN32_BAD_NAME, id, String.format(
853 						JGitText.get().corruptObjectInvalidNameEnd,
854 						Character.valueOf(((char) raw[end - 1]))));
855 			}
856 			if (end - ptr >= 3) {
857 				checkNotWindowsDevice(raw, ptr, end, id);
858 			}
859 		}
860 	}
861 
862 	// Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
863 	// to ".git" therefore we should prevent such names
864 	private boolean isMacHFSGit(byte[] raw, int ptr, int end,
865 			@Nullable AnyObjectId id) throws CorruptObjectException {
866 		boolean ignorable = false;
867 		byte[] git = new byte[] { '.', 'g', 'i', 't' };
868 		int g = 0;
869 		while (ptr < end) {
870 			switch (raw[ptr]) {
871 			case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
872 				if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
873 					return false;
874 				}
875 				switch (raw[ptr + 1]) {
876 				case (byte) 0x80:
877 					switch (raw[ptr + 2]) {
878 					case (byte) 0x8c:	// U+200C 0xe2808c ZERO WIDTH NON-JOINER
879 					case (byte) 0x8d:	// U+200D 0xe2808d ZERO WIDTH JOINER
880 					case (byte) 0x8e:	// U+200E 0xe2808e LEFT-TO-RIGHT MARK
881 					case (byte) 0x8f:	// U+200F 0xe2808f RIGHT-TO-LEFT MARK
882 					case (byte) 0xaa:	// U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
883 					case (byte) 0xab:	// U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
884 					case (byte) 0xac:	// U+202C 0xe280ac POP DIRECTIONAL FORMATTING
885 					case (byte) 0xad:	// U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
886 					case (byte) 0xae:	// U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
887 						ignorable = true;
888 						ptr += 3;
889 						continue;
890 					default:
891 						return false;
892 					}
893 				case (byte) 0x81:
894 					switch (raw[ptr + 2]) {
895 					case (byte) 0xaa:	// U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
896 					case (byte) 0xab:	// U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
897 					case (byte) 0xac:	// U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
898 					case (byte) 0xad:	// U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
899 					case (byte) 0xae:	// U+206E 0xe281ae NATIONAL DIGIT SHAPES
900 					case (byte) 0xaf:	// U+206F 0xe281af NOMINAL DIGIT SHAPES
901 						ignorable = true;
902 						ptr += 3;
903 						continue;
904 					default:
905 						return false;
906 					}
907 				default:
908 					return false;
909 				}
910 			case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
911 				if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
912 					return false;
913 				}
914 				// U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
915 				if ((raw[ptr + 1] == (byte) 0xbb)
916 						&& (raw[ptr + 2] == (byte) 0xbf)) {
917 					ignorable = true;
918 					ptr += 3;
919 					continue;
920 				}
921 				return false;
922 			default:
923 				if (g == 4)
924 					return false;
925 				if (raw[ptr++] != git[g++])
926 					return false;
927 			}
928 		}
929 		if (g == 4 && ignorable)
930 			return true;
931 		return false;
932 	}
933 
934 	private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end,
935 			@Nullable AnyObjectId id) throws CorruptObjectException {
936 		if ((ptr + 2) >= end) {
937 			report(BAD_UTF8, id, MessageFormat.format(
938 					JGitText.get().corruptObjectInvalidNameInvalidUtf8,
939 					toHexString(raw, ptr, end)));
940 			return false;
941 		}
942 		return true;
943 	}
944 
945 	private static String toHexString(byte[] raw, int ptr, int end) {
946 		StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
947 		for (int i = ptr; i < end; i++)
948 			b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
949 		return b.toString();
950 	}
951 
952 	private void checkNotWindowsDevice(byte[] raw, int ptr, int end,
953 			@Nullable AnyObjectId id) throws CorruptObjectException {
954 		switch (toLower(raw[ptr])) {
955 		case 'a': // AUX
956 			if (end - ptr >= 3
957 					&& toLower(raw[ptr + 1]) == 'u'
958 					&& toLower(raw[ptr + 2]) == 'x'
959 					&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
960 				report(WIN32_BAD_NAME, id,
961 						JGitText.get().corruptObjectInvalidNameAux);
962 			}
963 			break;
964 
965 		case 'c': // CON, COM[1-9]
966 			if (end - ptr >= 3
967 					&& toLower(raw[ptr + 2]) == 'n'
968 					&& toLower(raw[ptr + 1]) == 'o'
969 					&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
970 				report(WIN32_BAD_NAME, id,
971 						JGitText.get().corruptObjectInvalidNameCon);
972 			}
973 			if (end - ptr >= 4
974 					&& toLower(raw[ptr + 2]) == 'm'
975 					&& toLower(raw[ptr + 1]) == 'o'
976 					&& isPositiveDigit(raw[ptr + 3])
977 					&& (end - ptr == 4 || raw[ptr + 4] == '.')) {
978 				report(WIN32_BAD_NAME, id, String.format(
979 						JGitText.get().corruptObjectInvalidNameCom,
980 						Character.valueOf(((char) raw[ptr + 3]))));
981 			}
982 			break;
983 
984 		case 'l': // LPT[1-9]
985 			if (end - ptr >= 4
986 					&& toLower(raw[ptr + 1]) == 'p'
987 					&& toLower(raw[ptr + 2]) == 't'
988 					&& isPositiveDigit(raw[ptr + 3])
989 					&& (end - ptr == 4 || raw[ptr + 4] == '.')) {
990 				report(WIN32_BAD_NAME, id, String.format(
991 						JGitText.get().corruptObjectInvalidNameLpt,
992 						Character.valueOf(((char) raw[ptr + 3]))));
993 			}
994 			break;
995 
996 		case 'n': // NUL
997 			if (end - ptr >= 3
998 					&& toLower(raw[ptr + 1]) == 'u'
999 					&& toLower(raw[ptr + 2]) == 'l'
1000 					&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
1001 				report(WIN32_BAD_NAME, id,
1002 						JGitText.get().corruptObjectInvalidNameNul);
1003 			}
1004 			break;
1005 
1006 		case 'p': // PRN
1007 			if (end - ptr >= 3
1008 					&& toLower(raw[ptr + 1]) == 'r'
1009 					&& toLower(raw[ptr + 2]) == 'n'
1010 					&& (end - ptr == 3 || raw[ptr + 3] == '.')) {
1011 				report(WIN32_BAD_NAME, id,
1012 						JGitText.get().corruptObjectInvalidNamePrn);
1013 			}
1014 			break;
1015 		}
1016 	}
1017 
1018 	private static boolean isInvalidOnWindows(byte c) {
1019 		// Windows disallows "special" characters in a path component.
1020 		switch (c) {
1021 		case '"':
1022 		case '*':
1023 		case ':':
1024 		case '<':
1025 		case '>':
1026 		case '?':
1027 		case '\\':
1028 		case '|':
1029 			return true;
1030 		}
1031 		return 1 <= c && c <= 31;
1032 	}
1033 
1034 	private static boolean isGit(byte[] buf, int p) {
1035 		return toLower(buf[p]) == 'g'
1036 				&& toLower(buf[p + 1]) == 'i'
1037 				&& toLower(buf[p + 2]) == 't';
1038 	}
1039 
1040 	private static boolean isGitTilde1(byte[] buf, int p, int end) {
1041 		if (end - p != 5)
1042 			return false;
1043 		return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i'
1044 				&& toLower(buf[p + 2]) == 't' && buf[p + 3] == '~'
1045 				&& buf[p + 4] == '1';
1046 	}
1047 
1048 	private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
1049 		if (isGit(raw, ptr)) {
1050 			int dots = 0;
1051 			boolean space = false;
1052 			int p = end - 1;
1053 			for (; (ptr + 2) < p; p--) {
1054 				if (raw[p] == '.')
1055 					dots++;
1056 				else if (raw[p] == ' ')
1057 					space = true;
1058 				else
1059 					break;
1060 			}
1061 			return p == ptr + 2 && (dots == 1 || space);
1062 		}
1063 		return false;
1064 	}
1065 
1066 	private boolean match(byte[] b, byte[] src) {
1067 		int r = RawParseUtils.match(b, bufPtr.value, src);
1068 		if (r < 0) {
1069 			return false;
1070 		}
1071 		bufPtr.value = r;
1072 		return true;
1073 	}
1074 
1075 	private static char toLower(byte b) {
1076 		if ('A' <= b && b <= 'Z')
1077 			return (char) (b + ('a' - 'A'));
1078 		return (char) b;
1079 	}
1080 
1081 	private static boolean isPositiveDigit(byte b) {
1082 		return '1' <= b && b <= '9';
1083 	}
1084 
1085 	/**
1086 	 * Create a new {@link org.eclipse.jgit.lib.BlobObjectChecker}.
1087 	 *
1088 	 * @return new BlobObjectChecker or null if it's not provided.
1089 	 * @since 4.9
1090 	 */
1091 	@Nullable
1092 	public BlobObjectChecker newBlobObjectChecker() {
1093 		return null;
1094 	}
1095 
1096 	/**
1097 	 * Check a blob for errors.
1098 	 *
1099 	 * <p>
1100 	 * This may not be called from PackParser in some cases. Use
1101 	 * {@link #newBlobObjectChecker} instead.
1102 	 *
1103 	 * @param raw
1104 	 *            the blob data. The array is never modified.
1105 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
1106 	 *             if any error was detected.
1107 	 */
1108 	public void checkBlob(byte[] raw) throws CorruptObjectException {
1109 		// We can always assume the blob is valid.
1110 	}
1111 
1112 	private String normalize(byte[] raw, int ptr, int end) {
1113 		String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
1114 		return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
1115 	}
1116 }