View Javadoc
1   /*
2    * Copyright (C) 2008-2011, Google Inc.
3    * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
4    * Copyright (C) 2006-2008, Shawn O. Pearce <spearce@spearce.org>
5    * and other copyright owners as documented in the project's IP log.
6    *
7    * This program and the accompanying materials are made available
8    * under the terms of the Eclipse Distribution License v1.0 which
9    * accompanies this distribution, is reproduced below, and is
10   * available at http://www.eclipse.org/org/documents/edl-v10.php
11   *
12   * All rights reserved.
13   *
14   * Redistribution and use in source and binary forms, with or
15   * without modification, are permitted provided that the following
16   * conditions are met:
17   *
18   * - Redistributions of source code must retain the above copyright
19   *   notice, this list of conditions and the following disclaimer.
20   *
21   * - Redistributions in binary form must reproduce the above
22   *   copyright notice, this list of conditions and the following
23   *   disclaimer in the documentation and/or other materials provided
24   *   with the distribution.
25   *
26   * - Neither the name of the Eclipse Foundation, Inc. nor the
27   *   names of its contributors may be used to endorse or promote
28   *   products derived from this software without specific prior
29   *   written permission.
30   *
31   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
36   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
40   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
43   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44   */
45  
46  package org.eclipse.jgit.internal.storage.dfs;
47  
48  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
49  import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
50  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
51  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
52  
53  import java.io.BufferedInputStream;
54  import java.io.EOFException;
55  import java.io.IOException;
56  import java.io.InputStream;
57  import java.nio.ByteBuffer;
58  import java.nio.channels.Channels;
59  import java.text.MessageFormat;
60  import java.util.Set;
61  import java.util.zip.CRC32;
62  import java.util.zip.DataFormatException;
63  import java.util.zip.Inflater;
64  
65  import org.eclipse.jgit.errors.CorruptObjectException;
66  import org.eclipse.jgit.errors.LargeObjectException;
67  import org.eclipse.jgit.errors.MissingObjectException;
68  import org.eclipse.jgit.errors.PackInvalidException;
69  import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
70  import org.eclipse.jgit.internal.JGitText;
71  import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
72  import org.eclipse.jgit.internal.storage.file.PackIndex;
73  import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
74  import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
75  import org.eclipse.jgit.internal.storage.pack.PackExt;
76  import org.eclipse.jgit.internal.storage.pack.PackOutputStream;
77  import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation;
78  import org.eclipse.jgit.lib.AbbreviatedObjectId;
79  import org.eclipse.jgit.lib.AnyObjectId;
80  import org.eclipse.jgit.lib.Constants;
81  import org.eclipse.jgit.lib.ObjectId;
82  import org.eclipse.jgit.lib.ObjectLoader;
83  import org.eclipse.jgit.lib.Repository;
84  import org.eclipse.jgit.util.LongList;
85  
86  /**
87   * A Git version 2 pack file representation. A pack file contains Git objects in
88   * delta packed format yielding high compression of lots of object where some
89   * objects are similar.
90   */
91  public final class DfsPackFile {
92  	/**
93  	 * File offset used to cache {@link #index} in {@link DfsBlockCache}.
94  	 * <p>
95  	 * To better manage memory, the forward index is stored as a single block in
96  	 * the block cache under this file position. A negative value is used
97  	 * because it cannot occur in a normal pack file, and it is less likely to
98  	 * collide with a valid data block from the file as the high bits will all
99  	 * be set when treated as an unsigned long by the cache code.
100 	 */
101 	private static final long POS_INDEX = -1;
102 
103 	/** Offset used to cache {@link #reverseIndex}. See {@link #POS_INDEX}. */
104 	private static final long POS_REVERSE_INDEX = -2;
105 
106 	/** Offset used to cache {@link #bitmapIndex}. See {@link #POS_INDEX}. */
107 	private static final long POS_BITMAP_INDEX = -3;
108 
109 	/** Cache that owns this pack file and its data. */
110 	private final DfsBlockCache cache;
111 
112 	/** Description of the pack file's storage. */
113 	private final DfsPackDescription packDesc;
114 
115 	/** Unique identity of this pack while in-memory. */
116 	final DfsPackKey key;
117 
118 	/**
119 	 * Total number of bytes in this pack file.
120 	 * <p>
121 	 * This field initializes to -1 and gets populated when a block is loaded.
122 	 */
123 	volatile long length;
124 
125 	/**
126 	 * Preferred alignment for loading blocks from the backing file.
127 	 * <p>
128 	 * It is initialized to 0 and filled in on the first read made from the
129 	 * file. Block sizes may be odd, e.g. 4091, caused by the underling DFS
130 	 * storing 4091 user bytes and 5 bytes block metadata into a lower level
131 	 * 4096 byte block on disk.
132 	 */
133 	private volatile int blockSize;
134 
135 	/** True once corruption has been detected that cannot be worked around. */
136 	private volatile boolean invalid;
137 
138 	/**
139 	 * Lock for initialization of {@link #index} and {@link #corruptObjects}.
140 	 * <p>
141 	 * This lock ensures only one thread can perform the initialization work.
142 	 */
143 	private final Object initLock = new Object();
144 
145 	/** Index mapping {@link ObjectId} to position within the pack stream. */
146 	private volatile DfsBlockCache.Ref<PackIndex> index;
147 
148 	/** Reverse version of {@link #index} mapping position to {@link ObjectId}. */
149 	private volatile DfsBlockCache.Ref<PackReverseIndex> reverseIndex;
150 
151 	/** Index of compressed bitmap mapping entire object graph. */
152 	private volatile DfsBlockCache.Ref<PackBitmapIndex> bitmapIndex;
153 
154 	/**
155 	 * Objects we have tried to read, and discovered to be corrupt.
156 	 * <p>
157 	 * The list is allocated after the first corruption is found, and filled in
158 	 * as more entries are discovered. Typically this list is never used, as
159 	 * pack files do not usually contain corrupt objects.
160 	 */
161 	private volatile LongList corruptObjects;
162 
163 	/**
164 	 * Construct a reader for an existing, packfile.
165 	 *
166 	 * @param cache
167 	 *            cache that owns the pack data.
168 	 * @param desc
169 	 *            description of the pack within the DFS.
170 	 * @param key
171 	 *            interned key used to identify blocks in the block cache.
172 	 */
173 	DfsPackFile(DfsBlockCache cache, DfsPackDescription desc, DfsPackKey key) {
174 		this.cache = cache;
175 		this.packDesc = desc;
176 		this.key = key;
177 
178 		length = desc.getFileSize(PACK);
179 		if (length <= 0)
180 			length = -1;
181 	}
182 
183 	/** @return description that was originally used to configure this pack file. */
184 	public DfsPackDescription getPackDescription() {
185 		return packDesc;
186 	}
187 
188 	/**
189 	 * @return whether the pack index file is loaded and cached in memory.
190 	 * @since 2.2
191 	 */
192 	public boolean isIndexLoaded() {
193 		DfsBlockCache.Ref<PackIndex> idxref = index;
194 		return idxref != null && idxref.has();
195 	}
196 
197 	/** @return bytes cached in memory for this pack, excluding the index. */
198 	public long getCachedSize() {
199 		return key.cachedSize.get();
200 	}
201 
202 	String getPackName() {
203 		return packDesc.getFileName(PACK);
204 	}
205 
206 	void setBlockSize(int newSize) {
207 		blockSize = newSize;
208 	}
209 
210 	void setPackIndex(PackIndex idx) {
211 		long objCnt = idx.getObjectCount();
212 		int recSize = Constants.OBJECT_ID_LENGTH + 8;
213 		int sz = (int) Math.min(objCnt * recSize, Integer.MAX_VALUE);
214 		index = cache.put(key, POS_INDEX, sz, idx);
215 	}
216 
217 	/**
218 	 * Get the PackIndex for this PackFile.
219 	 *
220 	 * @param ctx
221 	 *            reader context to support reading from the backing store if
222 	 *            the index is not already loaded in memory.
223 	 * @return the PackIndex.
224 	 * @throws IOException
225 	 *             the pack index is not available, or is corrupt.
226 	 */
227 	public PackIndex getPackIndex(DfsReader ctx) throws IOException {
228 		return idx(ctx);
229 	}
230 
231 	private PackIndex idx(DfsReader ctx) throws IOException {
232 		DfsBlockCache.Ref<PackIndex> idxref = index;
233 		if (idxref != null) {
234 			PackIndex idx = idxref.get();
235 			if (idx != null)
236 				return idx;
237 		}
238 
239 		if (invalid)
240 			throw new PackInvalidException(getPackName());
241 
242 		Repository.getGlobalListenerList()
243 				.dispatch(new BeforeDfsPackIndexLoadedEvent(this));
244 
245 		synchronized (initLock) {
246 			idxref = index;
247 			if (idxref != null) {
248 				PackIndex idx = idxref.get();
249 				if (idx != null)
250 					return idx;
251 			}
252 
253 			PackIndex idx;
254 			try {
255 				ReadableChannel rc = ctx.db.openFile(packDesc, INDEX);
256 				try {
257 					InputStream in = Channels.newInputStream(rc);
258 					int wantSize = 8192;
259 					int bs = rc.blockSize();
260 					if (0 < bs && bs < wantSize)
261 						bs = (wantSize / bs) * bs;
262 					else if (bs <= 0)
263 						bs = wantSize;
264 					in = new BufferedInputStream(in, bs);
265 					idx = PackIndex.read(in);
266 				} finally {
267 					rc.close();
268 				}
269 			} catch (EOFException e) {
270 				invalid = true;
271 				IOException e2 = new IOException(MessageFormat.format(
272 						DfsText.get().shortReadOfIndex,
273 						packDesc.getFileName(INDEX)));
274 				e2.initCause(e);
275 				throw e2;
276 			} catch (IOException e) {
277 				invalid = true;
278 				IOException e2 = new IOException(MessageFormat.format(
279 						DfsText.get().cannotReadIndex,
280 						packDesc.getFileName(INDEX)));
281 				e2.initCause(e);
282 				throw e2;
283 			}
284 
285 			setPackIndex(idx);
286 			return idx;
287 		}
288 	}
289 
290 	final boolean isGarbage() {
291 		return packDesc.getPackSource() == UNREACHABLE_GARBAGE;
292 	}
293 
294 	PackBitmapIndex getBitmapIndex(DfsReader ctx) throws IOException {
295 		if (invalid || isGarbage())
296 			return null;
297 		DfsBlockCache.Ref<PackBitmapIndex> idxref = bitmapIndex;
298 		if (idxref != null) {
299 			PackBitmapIndex idx = idxref.get();
300 			if (idx != null)
301 				return idx;
302 		}
303 
304 		if (!packDesc.hasFileExt(PackExt.BITMAP_INDEX))
305 			return null;
306 
307 		synchronized (initLock) {
308 			idxref = bitmapIndex;
309 			if (idxref != null) {
310 				PackBitmapIndex idx = idxref.get();
311 				if (idx != null)
312 					return idx;
313 			}
314 
315 			long size;
316 			PackBitmapIndex idx;
317 			try {
318 				ReadableChannel rc = ctx.db.openFile(packDesc, BITMAP_INDEX);
319 				try {
320 					InputStream in = Channels.newInputStream(rc);
321 					int wantSize = 8192;
322 					int bs = rc.blockSize();
323 					if (0 < bs && bs < wantSize)
324 						bs = (wantSize / bs) * bs;
325 					else if (bs <= 0)
326 						bs = wantSize;
327 					in = new BufferedInputStream(in, bs);
328 					idx = PackBitmapIndex.read(
329 							in, idx(ctx), getReverseIdx(ctx));
330 				} finally {
331 					size = rc.position();
332 					rc.close();
333 				}
334 			} catch (EOFException e) {
335 				IOException e2 = new IOException(MessageFormat.format(
336 						DfsText.get().shortReadOfIndex,
337 						packDesc.getFileName(BITMAP_INDEX)));
338 				e2.initCause(e);
339 				throw e2;
340 			} catch (IOException e) {
341 				IOException e2 = new IOException(MessageFormat.format(
342 						DfsText.get().cannotReadIndex,
343 						packDesc.getFileName(BITMAP_INDEX)));
344 				e2.initCause(e);
345 				throw e2;
346 			}
347 
348 			bitmapIndex = cache.put(key, POS_BITMAP_INDEX,
349 					(int) Math.min(size, Integer.MAX_VALUE), idx);
350 			return idx;
351 		}
352 	}
353 
354 	PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
355 		DfsBlockCache.Ref<PackReverseIndex> revref = reverseIndex;
356 		if (revref != null) {
357 			PackReverseIndex revidx = revref.get();
358 			if (revidx != null)
359 				return revidx;
360 		}
361 
362 		synchronized (initLock) {
363 			revref = reverseIndex;
364 			if (revref != null) {
365 				PackReverseIndex revidx = revref.get();
366 				if (revidx != null)
367 					return revidx;
368 			}
369 
370 			PackIndex idx = idx(ctx);
371 			PackReverseIndex revidx = new PackReverseIndex(idx);
372 			int sz = (int) Math.min(
373 					idx.getObjectCount() * 8, Integer.MAX_VALUE);
374 			reverseIndex = cache.put(key, POS_REVERSE_INDEX, sz, revidx);
375 			return revidx;
376 		}
377 	}
378 
379 	/**
380 	 * Check if an object is stored within this pack.
381 	 *
382 	 * @param ctx
383 	 *            reader context to support reading from the backing store if
384 	 *            the index is not already loaded in memory.
385 	 * @param id
386 	 *            object to be located.
387 	 * @return true if the object exists in this pack; false if it does not.
388 	 * @throws IOException
389 	 *             the pack index is not available, or is corrupt.
390 	 */
391 	public boolean hasObject(DfsReader ctx, AnyObjectId id) throws IOException {
392 		final long offset = idx(ctx).findOffset(id);
393 		return 0 < offset && !isCorrupt(offset);
394 	}
395 
396 	/**
397 	 * Get an object from this pack.
398 	 *
399 	 * @param ctx
400 	 *            temporary working space associated with the calling thread.
401 	 * @param id
402 	 *            the object to obtain from the pack. Must not be null.
403 	 * @return the object loader for the requested object if it is contained in
404 	 *         this pack; null if the object was not found.
405 	 * @throws IOException
406 	 *             the pack file or the index could not be read.
407 	 */
408 	ObjectLoader get(DfsReader ctx, AnyObjectId id)
409 			throws IOException {
410 		long offset = idx(ctx).findOffset(id);
411 		return 0 < offset && !isCorrupt(offset) ? load(ctx, offset) : null;
412 	}
413 
414 	long findOffset(DfsReader ctx, AnyObjectId id) throws IOException {
415 		return idx(ctx).findOffset(id);
416 	}
417 
418 	void resolve(DfsReader ctx, Set<ObjectId> matches, AbbreviatedObjectId id,
419 			int matchLimit) throws IOException {
420 		idx(ctx).resolve(matches, id, matchLimit);
421 	}
422 
423 	/** Release all memory used by this DfsPackFile instance. */
424 	public void close() {
425 		cache.remove(this);
426 		index = null;
427 		reverseIndex = null;
428 	}
429 
430 	/**
431 	 * Obtain the total number of objects available in this pack. This method
432 	 * relies on pack index, giving number of effectively available objects.
433 	 *
434 	 * @param ctx
435 	 *            current reader for the calling thread.
436 	 * @return number of objects in index of this pack, likewise in this pack
437 	 * @throws IOException
438 	 *             the index file cannot be loaded into memory.
439 	 */
440 	long getObjectCount(DfsReader ctx) throws IOException {
441 		return idx(ctx).getObjectCount();
442 	}
443 
444 	private byte[] decompress(long position, int sz, DfsReader ctx)
445 			throws IOException, DataFormatException {
446 		byte[] dstbuf;
447 		try {
448 			dstbuf = new byte[sz];
449 		} catch (OutOfMemoryError noMemory) {
450 			// The size may be larger than our heap allows, return null to
451 			// let the caller know allocation isn't possible and it should
452 			// use the large object streaming approach instead.
453 			//
454 			// For example, this can occur when sz is 640 MB, and JRE
455 			// maximum heap size is only 256 MB. Even if the JRE has
456 			// 200 MB free, it cannot allocate a 640 MB byte array.
457 			return null;
458 		}
459 
460 		if (ctx.inflate(this, position, dstbuf, false) != sz)
461 			throw new EOFException(MessageFormat.format(
462 					JGitText.get().shortCompressedStreamAt,
463 					Long.valueOf(position)));
464 		return dstbuf;
465 	}
466 
467 	void copyPackAsIs(PackOutputStream out, DfsReader ctx)
468 			throws IOException {
469 		// If the length hasn't been determined yet, pin to set it.
470 		if (length == -1) {
471 			ctx.pin(this, 0);
472 			ctx.unpin();
473 		}
474 		if (cache.shouldCopyThroughCache(length))
475 			copyPackThroughCache(out, ctx);
476 		else
477 			copyPackBypassCache(out, ctx);
478 	}
479 
480 	private void copyPackThroughCache(PackOutputStream out, DfsReader ctx)
481 			throws IOException {
482 		long position = 12;
483 		long remaining = length - (12 + 20);
484 		while (0 < remaining) {
485 			DfsBlock b = cache.getOrLoad(this, position, ctx);
486 			int ptr = (int) (position - b.start);
487 			int n = (int) Math.min(b.size() - ptr, remaining);
488 			b.write(out, position, n);
489 			position += n;
490 			remaining -= n;
491 		}
492 	}
493 
494 	private long copyPackBypassCache(PackOutputStream out, DfsReader ctx)
495 			throws IOException {
496 		try (ReadableChannel rc = ctx.db.openFile(packDesc, PACK)) {
497 			ByteBuffer buf = newCopyBuffer(out, rc);
498 			if (ctx.getOptions().getStreamPackBufferSize() > 0)
499 				rc.setReadAheadBytes(ctx.getOptions().getStreamPackBufferSize());
500 			long position = 12;
501 			long remaining = length - (12 + 20);
502 			while (0 < remaining) {
503 				DfsBlock b = cache.get(key, alignToBlock(position));
504 				if (b != null) {
505 					int ptr = (int) (position - b.start);
506 					int n = (int) Math.min(b.size() - ptr, remaining);
507 					b.write(out, position, n);
508 					position += n;
509 					remaining -= n;
510 					rc.position(position);
511 					continue;
512 				}
513 
514 				buf.position(0);
515 				int n = read(rc, buf);
516 				if (n <= 0)
517 					throw packfileIsTruncated();
518 				else if (n > remaining)
519 					n = (int) remaining;
520 				out.write(buf.array(), 0, n);
521 				position += n;
522 				remaining -= n;
523 			}
524 			return position;
525 		}
526 	}
527 
528 	private ByteBuffer newCopyBuffer(PackOutputStream out, ReadableChannel rc) {
529 		int bs = blockSize(rc);
530 		byte[] copyBuf = out.getCopyBuffer();
531 		if (bs > copyBuf.length)
532 			copyBuf = new byte[bs];
533 		return ByteBuffer.wrap(copyBuf, 0, bs);
534 	}
535 
536 	void copyAsIs(PackOutputStream out, DfsObjectToPack src,
537 			boolean validate, DfsReader ctx) throws IOException,
538 			StoredObjectRepresentationNotAvailableException {
539 		final CRC32 crc1 = validate ? new CRC32() : null;
540 		final CRC32 crc2 = validate ? new CRC32() : null;
541 		final byte[] buf = out.getCopyBuffer();
542 
543 		// Rip apart the header so we can discover the size.
544 		//
545 		try {
546 			readFully(src.offset, buf, 0, 20, ctx);
547 		} catch (IOException ioError) {
548 			StoredObjectRepresentationNotAvailableException gone;
549 			gone = new StoredObjectRepresentationNotAvailableException(src);
550 			gone.initCause(ioError);
551 			throw gone;
552 		}
553 		int c = buf[0] & 0xff;
554 		final int typeCode = (c >> 4) & 7;
555 		long inflatedLength = c & 15;
556 		int shift = 4;
557 		int headerCnt = 1;
558 		while ((c & 0x80) != 0) {
559 			c = buf[headerCnt++] & 0xff;
560 			inflatedLength += ((long) (c & 0x7f)) << shift;
561 			shift += 7;
562 		}
563 
564 		if (typeCode == Constants.OBJ_OFS_DELTA) {
565 			do {
566 				c = buf[headerCnt++] & 0xff;
567 			} while ((c & 128) != 0);
568 			if (validate) {
569 				assert(crc1 != null && crc2 != null);
570 				crc1.update(buf, 0, headerCnt);
571 				crc2.update(buf, 0, headerCnt);
572 			}
573 		} else if (typeCode == Constants.OBJ_REF_DELTA) {
574 			if (validate) {
575 				assert(crc1 != null && crc2 != null);
576 				crc1.update(buf, 0, headerCnt);
577 				crc2.update(buf, 0, headerCnt);
578 			}
579 
580 			readFully(src.offset + headerCnt, buf, 0, 20, ctx);
581 			if (validate) {
582 				assert(crc1 != null && crc2 != null);
583 				crc1.update(buf, 0, 20);
584 				crc2.update(buf, 0, 20);
585 			}
586 			headerCnt += 20;
587 		} else if (validate) {
588 			assert(crc1 != null && crc2 != null);
589 			crc1.update(buf, 0, headerCnt);
590 			crc2.update(buf, 0, headerCnt);
591 		}
592 
593 		final long dataOffset = src.offset + headerCnt;
594 		final long dataLength = src.length;
595 		final long expectedCRC;
596 		final DfsBlock quickCopy;
597 
598 		// Verify the object isn't corrupt before sending. If it is,
599 		// we report it missing instead.
600 		//
601 		try {
602 			quickCopy = ctx.quickCopy(this, dataOffset, dataLength);
603 
604 			if (validate && idx(ctx).hasCRC32Support()) {
605 				assert(crc1 != null);
606 				// Index has the CRC32 code cached, validate the object.
607 				//
608 				expectedCRC = idx(ctx).findCRC32(src);
609 				if (quickCopy != null) {
610 					quickCopy.crc32(crc1, dataOffset, (int) dataLength);
611 				} else {
612 					long pos = dataOffset;
613 					long cnt = dataLength;
614 					while (cnt > 0) {
615 						final int n = (int) Math.min(cnt, buf.length);
616 						readFully(pos, buf, 0, n, ctx);
617 						crc1.update(buf, 0, n);
618 						pos += n;
619 						cnt -= n;
620 					}
621 				}
622 				if (crc1.getValue() != expectedCRC) {
623 					setCorrupt(src.offset);
624 					throw new CorruptObjectException(MessageFormat.format(
625 							JGitText.get().objectAtHasBadZlibStream,
626 							Long.valueOf(src.offset), getPackName()));
627 				}
628 			} else if (validate) {
629 				assert(crc1 != null);
630 				// We don't have a CRC32 code in the index, so compute it
631 				// now while inflating the raw data to get zlib to tell us
632 				// whether or not the data is safe.
633 				//
634 				Inflater inf = ctx.inflater();
635 				byte[] tmp = new byte[1024];
636 				if (quickCopy != null) {
637 					quickCopy.check(inf, tmp, dataOffset, (int) dataLength);
638 				} else {
639 					long pos = dataOffset;
640 					long cnt = dataLength;
641 					while (cnt > 0) {
642 						final int n = (int) Math.min(cnt, buf.length);
643 						readFully(pos, buf, 0, n, ctx);
644 						crc1.update(buf, 0, n);
645 						inf.setInput(buf, 0, n);
646 						while (inf.inflate(tmp, 0, tmp.length) > 0)
647 							continue;
648 						pos += n;
649 						cnt -= n;
650 					}
651 				}
652 				if (!inf.finished() || inf.getBytesRead() != dataLength) {
653 					setCorrupt(src.offset);
654 					throw new EOFException(MessageFormat.format(
655 							JGitText.get().shortCompressedStreamAt,
656 							Long.valueOf(src.offset)));
657 				}
658 				expectedCRC = crc1.getValue();
659 			} else {
660 				expectedCRC = -1;
661 			}
662 		} catch (DataFormatException dataFormat) {
663 			setCorrupt(src.offset);
664 
665 			CorruptObjectException corruptObject = new CorruptObjectException(
666 					MessageFormat.format(
667 							JGitText.get().objectAtHasBadZlibStream,
668 							Long.valueOf(src.offset), getPackName()));
669 			corruptObject.initCause(dataFormat);
670 
671 			StoredObjectRepresentationNotAvailableException gone;
672 			gone = new StoredObjectRepresentationNotAvailableException(src);
673 			gone.initCause(corruptObject);
674 			throw gone;
675 
676 		} catch (IOException ioError) {
677 			StoredObjectRepresentationNotAvailableException gone;
678 			gone = new StoredObjectRepresentationNotAvailableException(src);
679 			gone.initCause(ioError);
680 			throw gone;
681 		}
682 
683 		if (quickCopy != null) {
684 			// The entire object fits into a single byte array window slice,
685 			// and we have it pinned.  Write this out without copying.
686 			//
687 			out.writeHeader(src, inflatedLength);
688 			quickCopy.write(out, dataOffset, (int) dataLength);
689 
690 		} else if (dataLength <= buf.length) {
691 			// Tiny optimization: Lots of objects are very small deltas or
692 			// deflated commits that are likely to fit in the copy buffer.
693 			//
694 			if (!validate) {
695 				long pos = dataOffset;
696 				long cnt = dataLength;
697 				while (cnt > 0) {
698 					final int n = (int) Math.min(cnt, buf.length);
699 					readFully(pos, buf, 0, n, ctx);
700 					pos += n;
701 					cnt -= n;
702 				}
703 			}
704 			out.writeHeader(src, inflatedLength);
705 			out.write(buf, 0, (int) dataLength);
706 		} else {
707 			// Now we are committed to sending the object. As we spool it out,
708 			// check its CRC32 code to make sure there wasn't corruption between
709 			// the verification we did above, and us actually outputting it.
710 			//
711 			out.writeHeader(src, inflatedLength);
712 			long pos = dataOffset;
713 			long cnt = dataLength;
714 			while (cnt > 0) {
715 				final int n = (int) Math.min(cnt, buf.length);
716 				readFully(pos, buf, 0, n, ctx);
717 				if (validate) {
718 					assert(crc2 != null);
719 					crc2.update(buf, 0, n);
720 				}
721 				out.write(buf, 0, n);
722 				pos += n;
723 				cnt -= n;
724 			}
725 			if (validate) {
726 				assert(crc2 != null);
727 				if (crc2.getValue() != expectedCRC) {
728 					throw new CorruptObjectException(MessageFormat.format(
729 							JGitText.get().objectAtHasBadZlibStream,
730 							Long.valueOf(src.offset), getPackName()));
731 				}
732 			}
733 		}
734 	}
735 
736 	boolean invalid() {
737 		return invalid;
738 	}
739 
740 	void setInvalid() {
741 		invalid = true;
742 	}
743 
744 	private IOException packfileIsTruncated() {
745 		invalid = true;
746 		return new IOException(MessageFormat.format(
747 				JGitText.get().packfileIsTruncated, getPackName()));
748 	}
749 
750 	private void readFully(long position, byte[] dstbuf, int dstoff, int cnt,
751 			DfsReader ctx) throws IOException {
752 		if (ctx.copy(this, position, dstbuf, dstoff, cnt) != cnt)
753 			throw new EOFException();
754 	}
755 
756 	long alignToBlock(long pos) {
757 		int size = blockSize;
758 		if (size == 0)
759 			size = cache.getBlockSize();
760 		return (pos / size) * size;
761 	}
762 
763 	DfsBlock getOrLoadBlock(long pos, DfsReader ctx) throws IOException {
764 		return cache.getOrLoad(this, pos, ctx);
765 	}
766 
767 	DfsBlock readOneBlock(long pos, DfsReader ctx)
768 			throws IOException {
769 		if (invalid)
770 			throw new PackInvalidException(getPackName());
771 
772 		ReadableChannel rc = ctx.db.openFile(packDesc, PACK);
773 		try {
774 			int size = blockSize(rc);
775 			pos = (pos / size) * size;
776 
777 			// If the size of the file is not yet known, try to discover it.
778 			// Channels may choose to return -1 to indicate they don't
779 			// know the length yet, in this case read up to the size unit
780 			// given by the caller, then recheck the length.
781 			long len = length;
782 			if (len < 0) {
783 				len = rc.size();
784 				if (0 <= len)
785 					length = len;
786 			}
787 
788 			if (0 <= len && len < pos + size)
789 				size = (int) (len - pos);
790 			if (size <= 0)
791 				throw new EOFException(MessageFormat.format(
792 						DfsText.get().shortReadOfBlock, Long.valueOf(pos),
793 						getPackName(), Long.valueOf(0), Long.valueOf(0)));
794 
795 			byte[] buf = new byte[size];
796 			rc.position(pos);
797 			int cnt = read(rc, ByteBuffer.wrap(buf, 0, size));
798 			if (cnt != size) {
799 				if (0 <= len) {
800 					throw new EOFException(MessageFormat.format(
801 						    DfsText.get().shortReadOfBlock,
802 						    Long.valueOf(pos),
803 						    getPackName(),
804 						    Integer.valueOf(size),
805 						    Integer.valueOf(cnt)));
806 				}
807 
808 				// Assume the entire thing was read in a single shot, compact
809 				// the buffer to only the space required.
810 				byte[] n = new byte[cnt];
811 				System.arraycopy(buf, 0, n, 0, n.length);
812 				buf = n;
813 			} else if (len < 0) {
814 				// With no length at the start of the read, the channel should
815 				// have the length available at the end.
816 				length = len = rc.size();
817 			}
818 
819 			DfsBlock v = new DfsBlock(key, pos, buf);
820 			return v;
821 		} finally {
822 			rc.close();
823 		}
824 	}
825 
826 	private int blockSize(ReadableChannel rc) {
827 		// If the block alignment is not yet known, discover it. Prefer the
828 		// larger size from either the cache or the file itself.
829 		int size = blockSize;
830 		if (size == 0) {
831 			size = rc.blockSize();
832 			if (size <= 0)
833 				size = cache.getBlockSize();
834 			else if (size < cache.getBlockSize())
835 				size = (cache.getBlockSize() / size) * size;
836 			blockSize = size;
837 		}
838 		return size;
839 	}
840 
841 	private static int read(ReadableChannel rc, ByteBuffer buf)
842 			throws IOException {
843 		int n;
844 		do {
845 			n = rc.read(buf);
846 		} while (0 < n && buf.hasRemaining());
847 		return buf.position();
848 	}
849 
850 	ObjectLoader load(DfsReader ctx, long pos)
851 			throws IOException {
852 		try {
853 			final byte[] ib = ctx.tempId;
854 			Delta delta = null;
855 			byte[] data = null;
856 			int type = Constants.OBJ_BAD;
857 			boolean cached = false;
858 
859 			SEARCH: for (;;) {
860 				readFully(pos, ib, 0, 20, ctx);
861 				int c = ib[0] & 0xff;
862 				final int typeCode = (c >> 4) & 7;
863 				long sz = c & 15;
864 				int shift = 4;
865 				int p = 1;
866 				while ((c & 0x80) != 0) {
867 					c = ib[p++] & 0xff;
868 					sz += ((long) (c & 0x7f)) << shift;
869 					shift += 7;
870 				}
871 
872 				switch (typeCode) {
873 				case Constants.OBJ_COMMIT:
874 				case Constants.OBJ_TREE:
875 				case Constants.OBJ_BLOB:
876 				case Constants.OBJ_TAG: {
877 					if (delta != null) {
878 						data = decompress(pos + p, (int) sz, ctx);
879 						type = typeCode;
880 						break SEARCH;
881 					}
882 
883 					if (sz < ctx.getStreamFileThreshold()) {
884 						data = decompress(pos + p, (int) sz, ctx);
885 						if (data != null)
886 							return new ObjectLoader.SmallObject(typeCode, data);
887 					}
888 					return new LargePackedWholeObject(typeCode, sz, pos, p, this, ctx.db);
889 				}
890 
891 				case Constants.OBJ_OFS_DELTA: {
892 					c = ib[p++] & 0xff;
893 					long base = c & 127;
894 					while ((c & 128) != 0) {
895 						base += 1;
896 						c = ib[p++] & 0xff;
897 						base <<= 7;
898 						base += (c & 127);
899 					}
900 					base = pos - base;
901 					delta = new Delta(delta, pos, (int) sz, p, base);
902 					if (sz != delta.deltaSize)
903 						break SEARCH;
904 
905 					DeltaBaseCache.Entry e = ctx.getDeltaBaseCache().get(key, base);
906 					if (e != null) {
907 						type = e.type;
908 						data = e.data;
909 						cached = true;
910 						break SEARCH;
911 					}
912 					pos = base;
913 					continue SEARCH;
914 				}
915 
916 				case Constants.OBJ_REF_DELTA: {
917 					readFully(pos + p, ib, 0, 20, ctx);
918 					long base = findDeltaBase(ctx, ObjectId.fromRaw(ib));
919 					delta = new Delta(delta, pos, (int) sz, p + 20, base);
920 					if (sz != delta.deltaSize)
921 						break SEARCH;
922 
923 					DeltaBaseCache.Entry e = ctx.getDeltaBaseCache().get(key, base);
924 					if (e != null) {
925 						type = e.type;
926 						data = e.data;
927 						cached = true;
928 						break SEARCH;
929 					}
930 					pos = base;
931 					continue SEARCH;
932 				}
933 
934 				default:
935 					throw new IOException(MessageFormat.format(
936 							JGitText.get().unknownObjectType, Integer.valueOf(typeCode)));
937 				}
938 			}
939 
940 			// At this point there is at least one delta to apply to data.
941 			// (Whole objects with no deltas to apply return early above.)
942 
943 			if (data == null)
944 				throw new LargeObjectException();
945 
946 			assert(delta != null);
947 			do {
948 				// Cache only the base immediately before desired object.
949 				if (cached)
950 					cached = false;
951 				else if (delta.next == null)
952 					ctx.getDeltaBaseCache().put(key, delta.basePos, type, data);
953 
954 				pos = delta.deltaPos;
955 
956 				byte[] cmds = decompress(pos + delta.hdrLen, delta.deltaSize, ctx);
957 				if (cmds == null) {
958 					data = null; // Discard base in case of OutOfMemoryError
959 					throw new LargeObjectException();
960 				}
961 
962 				final long sz = BinaryDelta.getResultSize(cmds);
963 				if (Integer.MAX_VALUE <= sz)
964 					throw new LargeObjectException.ExceedsByteArrayLimit();
965 
966 				final byte[] result;
967 				try {
968 					result = new byte[(int) sz];
969 				} catch (OutOfMemoryError tooBig) {
970 					data = null; // Discard base in case of OutOfMemoryError
971 					cmds = null;
972 					throw new LargeObjectException.OutOfMemory(tooBig);
973 				}
974 
975 				BinaryDelta.apply(data, cmds, result);
976 				data = result;
977 				delta = delta.next;
978 			} while (delta != null);
979 
980 			return new ObjectLoader.SmallObject(type, data);
981 
982 		} catch (DataFormatException dfe) {
983 			CorruptObjectException coe = new CorruptObjectException(
984 					MessageFormat.format(
985 							JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
986 							getPackName()));
987 			coe.initCause(dfe);
988 			throw coe;
989 		}
990 	}
991 
992 	private long findDeltaBase(DfsReader ctx, ObjectId baseId)
993 			throws IOException, MissingObjectException {
994 		long ofs = idx(ctx).findOffset(baseId);
995 		if (ofs < 0)
996 			throw new MissingObjectException(baseId,
997 					JGitText.get().missingDeltaBase);
998 		return ofs;
999 	}
1000 
1001 	private static class Delta {
1002 		/** Child that applies onto this object. */
1003 		final Delta next;
1004 
1005 		/** Offset of the delta object. */
1006 		final long deltaPos;
1007 
1008 		/** Size of the inflated delta stream. */
1009 		final int deltaSize;
1010 
1011 		/** Total size of the delta's pack entry header (including base). */
1012 		final int hdrLen;
1013 
1014 		/** Offset of the base object this delta applies onto. */
1015 		final long basePos;
1016 
1017 		Delta(Delta next, long ofs, int sz, int hdrLen, long baseOffset) {
1018 			this.next = next;
1019 			this.deltaPos = ofs;
1020 			this.deltaSize = sz;
1021 			this.hdrLen = hdrLen;
1022 			this.basePos = baseOffset;
1023 		}
1024 	}
1025 
1026 	byte[] getDeltaHeader(DfsReader wc, long pos)
1027 			throws IOException, DataFormatException {
1028 		// The delta stream starts as two variable length integers. If we
1029 		// assume they are 64 bits each, we need 16 bytes to encode them,
1030 		// plus 2 extra bytes for the variable length overhead. So 18 is
1031 		// the longest delta instruction header.
1032 		//
1033 		final byte[] hdr = new byte[32];
1034 		wc.inflate(this, pos, hdr, true /* header only */);
1035 		return hdr;
1036 	}
1037 
1038 	int getObjectType(DfsReader ctx, long pos) throws IOException {
1039 		final byte[] ib = ctx.tempId;
1040 		for (;;) {
1041 			readFully(pos, ib, 0, 20, ctx);
1042 			int c = ib[0] & 0xff;
1043 			final int type = (c >> 4) & 7;
1044 
1045 			switch (type) {
1046 			case Constants.OBJ_COMMIT:
1047 			case Constants.OBJ_TREE:
1048 			case Constants.OBJ_BLOB:
1049 			case Constants.OBJ_TAG:
1050 				return type;
1051 
1052 			case Constants.OBJ_OFS_DELTA: {
1053 				int p = 1;
1054 				while ((c & 0x80) != 0)
1055 					c = ib[p++] & 0xff;
1056 				c = ib[p++] & 0xff;
1057 				long ofs = c & 127;
1058 				while ((c & 128) != 0) {
1059 					ofs += 1;
1060 					c = ib[p++] & 0xff;
1061 					ofs <<= 7;
1062 					ofs += (c & 127);
1063 				}
1064 				pos = pos - ofs;
1065 				continue;
1066 			}
1067 
1068 			case Constants.OBJ_REF_DELTA: {
1069 				int p = 1;
1070 				while ((c & 0x80) != 0)
1071 					c = ib[p++] & 0xff;
1072 				readFully(pos + p, ib, 0, 20, ctx);
1073 				pos = findDeltaBase(ctx, ObjectId.fromRaw(ib));
1074 				continue;
1075 			}
1076 
1077 			default:
1078 				throw new IOException(MessageFormat.format(
1079 						JGitText.get().unknownObjectType, Integer.valueOf(type)));
1080 			}
1081 		}
1082 	}
1083 
1084 	long getObjectSize(DfsReader ctx, AnyObjectId id) throws IOException {
1085 		final long offset = idx(ctx).findOffset(id);
1086 		return 0 < offset ? getObjectSize(ctx, offset) : -1;
1087 	}
1088 
1089 	long getObjectSize(DfsReader ctx, long pos)
1090 			throws IOException {
1091 		final byte[] ib = ctx.tempId;
1092 		readFully(pos, ib, 0, 20, ctx);
1093 		int c = ib[0] & 0xff;
1094 		final int type = (c >> 4) & 7;
1095 		long sz = c & 15;
1096 		int shift = 4;
1097 		int p = 1;
1098 		while ((c & 0x80) != 0) {
1099 			c = ib[p++] & 0xff;
1100 			sz += ((long) (c & 0x7f)) << shift;
1101 			shift += 7;
1102 		}
1103 
1104 		long deltaAt;
1105 		switch (type) {
1106 		case Constants.OBJ_COMMIT:
1107 		case Constants.OBJ_TREE:
1108 		case Constants.OBJ_BLOB:
1109 		case Constants.OBJ_TAG:
1110 			return sz;
1111 
1112 		case Constants.OBJ_OFS_DELTA:
1113 			c = ib[p++] & 0xff;
1114 			while ((c & 128) != 0)
1115 				c = ib[p++] & 0xff;
1116 			deltaAt = pos + p;
1117 			break;
1118 
1119 		case Constants.OBJ_REF_DELTA:
1120 			deltaAt = pos + p + 20;
1121 			break;
1122 
1123 		default:
1124 			throw new IOException(MessageFormat.format(
1125 					JGitText.get().unknownObjectType, Integer.valueOf(type)));
1126 		}
1127 
1128 		try {
1129 			return BinaryDelta.getResultSize(getDeltaHeader(ctx, deltaAt));
1130 		} catch (DataFormatException dfe) {
1131 			CorruptObjectException coe = new CorruptObjectException(
1132 					MessageFormat.format(
1133 							JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
1134 							getPackName()));
1135 			coe.initCause(dfe);
1136 			throw coe;
1137 		}
1138 	}
1139 
1140 	void representation(DfsObjectRepresentation r, final long pos,
1141 			DfsReader ctx, PackReverseIndex rev)
1142 			throws IOException {
1143 		r.offset = pos;
1144 		final byte[] ib = ctx.tempId;
1145 		readFully(pos, ib, 0, 20, ctx);
1146 		int c = ib[0] & 0xff;
1147 		int p = 1;
1148 		final int typeCode = (c >> 4) & 7;
1149 		while ((c & 0x80) != 0)
1150 			c = ib[p++] & 0xff;
1151 
1152 		long len = rev.findNextOffset(pos, length - 20) - pos;
1153 		switch (typeCode) {
1154 		case Constants.OBJ_COMMIT:
1155 		case Constants.OBJ_TREE:
1156 		case Constants.OBJ_BLOB:
1157 		case Constants.OBJ_TAG:
1158 			r.format = StoredObjectRepresentation.PACK_WHOLE;
1159 			r.baseId = null;
1160 			r.length = len - p;
1161 			return;
1162 
1163 		case Constants.OBJ_OFS_DELTA: {
1164 			c = ib[p++] & 0xff;
1165 			long ofs = c & 127;
1166 			while ((c & 128) != 0) {
1167 				ofs += 1;
1168 				c = ib[p++] & 0xff;
1169 				ofs <<= 7;
1170 				ofs += (c & 127);
1171 			}
1172 			r.format = StoredObjectRepresentation.PACK_DELTA;
1173 			r.baseId = rev.findObject(pos - ofs);
1174 			r.length = len - p;
1175 			return;
1176 		}
1177 
1178 		case Constants.OBJ_REF_DELTA: {
1179 			readFully(pos + p, ib, 0, 20, ctx);
1180 			r.format = StoredObjectRepresentation.PACK_DELTA;
1181 			r.baseId = ObjectId.fromRaw(ib);
1182 			r.length = len - p - 20;
1183 			return;
1184 		}
1185 
1186 		default:
1187 			throw new IOException(MessageFormat.format(
1188 					JGitText.get().unknownObjectType, Integer.valueOf(typeCode)));
1189 		}
1190 	}
1191 
1192 	private boolean isCorrupt(long offset) {
1193 		LongList list = corruptObjects;
1194 		if (list == null)
1195 			return false;
1196 		synchronized (list) {
1197 			return list.contains(offset);
1198 		}
1199 	}
1200 
1201 	private void setCorrupt(long offset) {
1202 		LongList list = corruptObjects;
1203 		if (list == null) {
1204 			synchronized (initLock) {
1205 				list = corruptObjects;
1206 				if (list == null) {
1207 					list = new LongList();
1208 					corruptObjects = list;
1209 				}
1210 			}
1211 		}
1212 		synchronized (list) {
1213 			list.add(offset);
1214 		}
1215 	}
1216 }