View Javadoc
1   /*
2    * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.File;
48  import java.io.FileNotFoundException;
49  import java.io.IOException;
50  import java.io.InputStream;
51  import java.text.MessageFormat;
52  import java.util.Iterator;
53  import java.util.Set;
54  
55  import org.eclipse.jgit.errors.CorruptObjectException;
56  import org.eclipse.jgit.errors.MissingObjectException;
57  import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
58  import org.eclipse.jgit.internal.JGitText;
59  import org.eclipse.jgit.lib.AbbreviatedObjectId;
60  import org.eclipse.jgit.lib.AnyObjectId;
61  import org.eclipse.jgit.lib.MutableObjectId;
62  import org.eclipse.jgit.lib.ObjectId;
63  import org.eclipse.jgit.lib.ObjectIdSet;
64  import org.eclipse.jgit.util.IO;
65  import org.eclipse.jgit.util.NB;
66  import org.eclipse.jgit.util.io.SilentFileInputStream;
67  
68  /**
69   * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
70   * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
71   * <p>
72   * Indexes are strictly redundant information in that we can rebuild all of the
73   * data held in the index file from the on disk representation of the pack file
74   * itself, but it is faster to access for random requests because data is stored
75   * by ObjectId.
76   * </p>
77   */
78  public abstract class PackIndex
79  		implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
80  	/**
81  	 * Open an existing pack <code>.idx</code> file for reading.
82  	 * <p>
83  	 * The format of the file will be automatically detected and a proper access
84  	 * implementation for that format will be constructed and returned to the
85  	 * caller. The file may or may not be held open by the returned instance.
86  	 * </p>
87  	 *
88  	 * @param idxFile
89  	 *            existing pack .idx to read.
90  	 * @return access implementation for the requested file.
91  	 * @throws FileNotFoundException
92  	 *             the file does not exist.
93  	 * @throws java.io.IOException
94  	 *             the file exists but could not be read due to security errors,
95  	 *             unrecognized data version, or unexpected data corruption.
96  	 */
97  	public static PackIndex open(File idxFile) throws IOException {
98  		try (SilentFileInputStream fd = new SilentFileInputStream(
99  				idxFile)) {
100 				return read(fd);
101 		} catch (IOException ioe) {
102 			throw new IOException(
103 					MessageFormat.format(JGitText.get().unreadablePackIndex,
104 							idxFile.getAbsolutePath()),
105 					ioe);
106 		}
107 	}
108 
109 	/**
110 	 * Read an existing pack index file from a buffered stream.
111 	 * <p>
112 	 * The format of the file will be automatically detected and a proper access
113 	 * implementation for that format will be constructed and returned to the
114 	 * caller. The file may or may not be held open by the returned instance.
115 	 *
116 	 * @param fd
117 	 *            stream to read the index file from. The stream must be
118 	 *            buffered as some small IOs are performed against the stream.
119 	 *            The caller is responsible for closing the stream.
120 	 * @return a copy of the index in-memory.
121 	 * @throws java.io.IOException
122 	 *             the stream cannot be read.
123 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
124 	 *             the stream does not contain a valid pack index.
125 	 */
126 	public static PackIndex read(InputStream fd) throws IOException,
127 			CorruptObjectException {
128 		final byte[] hdr = new byte[8];
129 		IO.readFully(fd, hdr, 0, hdr.length);
130 		if (isTOC(hdr)) {
131 			final int v = NB.decodeInt32(hdr, 4);
132 			switch (v) {
133 			case 2:
134 				return new PackIndexV2(fd);
135 			default:
136 				throw new UnsupportedPackIndexVersionException(v);
137 			}
138 		}
139 		return new PackIndexV1(fd, hdr);
140 	}
141 
142 	private static boolean isTOC(byte[] h) {
143 		final byte[] toc = PackIndexWriter.TOC;
144 		for (int i = 0; i < toc.length; i++)
145 			if (h[i] != toc[i])
146 				return false;
147 		return true;
148 	}
149 
150 	/** Footer checksum applied on the bottom of the pack file. */
151 	protected byte[] packChecksum;
152 
153 	/**
154 	 * Determine if an object is contained within the pack file.
155 	 *
156 	 * @param id
157 	 *            the object to look for. Must not be null.
158 	 * @return true if the object is listed in this index; false otherwise.
159 	 */
160 	public boolean hasObject(AnyObjectId id) {
161 		return findOffset(id) != -1;
162 	}
163 
164 	/** {@inheritDoc} */
165 	@Override
166 	public boolean contains(AnyObjectId id) {
167 		return findOffset(id) != -1;
168 	}
169 
170 	/**
171 	 * {@inheritDoc}
172 	 * <p>
173 	 * Provide iterator that gives access to index entries. Note, that iterator
174 	 * returns reference to mutable object, the same reference in each call -
175 	 * for performance reason. If client needs immutable objects, it must copy
176 	 * returned object on its own.
177 	 * <p>
178 	 * Iterator returns objects in SHA-1 lexicographical order.
179 	 * </p>
180 	 */
181 	@Override
182 	public abstract Iterator<MutableEntry> iterator();
183 
184 	/**
185 	 * Obtain the total number of objects described by this index.
186 	 *
187 	 * @return number of objects in this index, and likewise in the associated
188 	 *         pack that this index was generated from.
189 	 */
190 	public abstract long getObjectCount();
191 
192 	/**
193 	 * Obtain the total number of objects needing 64 bit offsets.
194 	 *
195 	 * @return number of objects in this index using a 64 bit offset; that is an
196 	 *         object positioned after the 2 GB position within the file.
197 	 */
198 	public abstract long getOffset64Count();
199 
200 	/**
201 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
202 	 * <p>
203 	 * This method is a constant-time replacement for the following loop:
204 	 *
205 	 * <pre>
206 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
207 	 * int curPosition = 0;
208 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
209 	 * 	eItr.next();
210 	 * ObjectId result = eItr.next().toObjectId();
211 	 * </pre>
212 	 *
213 	 * @param nthPosition
214 	 *            position within the traversal of {@link #iterator()} that the
215 	 *            caller needs the object for. The first returned
216 	 *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
217 	 *            is 0, the second is 1, etc.
218 	 * @return the ObjectId for the corresponding entry.
219 	 */
220 	public abstract ObjectId getObjectId(long nthPosition);
221 
222 	/**
223 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
224 	 * <p>
225 	 * This method is a constant-time replacement for the following loop:
226 	 *
227 	 * <pre>
228 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
229 	 * int curPosition = 0;
230 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
231 	 * 	eItr.next();
232 	 * ObjectId result = eItr.next().toObjectId();
233 	 * </pre>
234 	 *
235 	 * @param nthPosition
236 	 *            unsigned 32 bit position within the traversal of
237 	 *            {@link #iterator()} that the caller needs the object for. The
238 	 *            first returned
239 	 *            {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
240 	 *            is 0, the second is 1, etc. Positions past 2**31-1 are
241 	 *            negative, but still valid.
242 	 * @return the ObjectId for the corresponding entry.
243 	 */
244 	public final ObjectId getObjectId(int nthPosition) {
245 		if (nthPosition >= 0)
246 			return getObjectId((long) nthPosition);
247 		final int u31 = nthPosition >>> 1;
248 		final int one = nthPosition & 1;
249 		return getObjectId(((long) u31) << 1 | one);
250 	}
251 
252 	/**
253 	 * Get offset in a pack for the n-th object entry returned by
254 	 * {@link #iterator()}.
255 	 *
256 	 * @param nthPosition
257 	 *            unsigned 32 bit position within the traversal of
258 	 *            {@link #iterator()} for which the caller needs the offset. The
259 	 *            first returned {@link MutableEntry} is 0, the second is 1,
260 	 *            etc. Positions past 2**31-1 are negative, but still valid.
261 	 * @return the offset in a pack for the corresponding entry.
262 	 */
263 	abstract long getOffset(long nthPosition);
264 
265 	/**
266 	 * Locate the file offset position for the requested object.
267 	 *
268 	 * @param objId
269 	 *            name of the object to locate within the pack.
270 	 * @return offset of the object's header and compressed content; -1 if the
271 	 *         object does not exist in this index and is thus not stored in the
272 	 *         associated pack.
273 	 */
274 	public abstract long findOffset(AnyObjectId objId);
275 
276 	/**
277 	 * Retrieve stored CRC32 checksum of the requested object raw-data
278 	 * (including header).
279 	 *
280 	 * @param objId
281 	 *            id of object to look for
282 	 * @return CRC32 checksum of specified object (at 32 less significant bits)
283 	 * @throws org.eclipse.jgit.errors.MissingObjectException
284 	 *             when requested ObjectId was not found in this index
285 	 * @throws java.lang.UnsupportedOperationException
286 	 *             when this index doesn't support CRC32 checksum
287 	 */
288 	public abstract long findCRC32(AnyObjectId objId)
289 			throws MissingObjectException, UnsupportedOperationException;
290 
291 	/**
292 	 * Check whether this index supports (has) CRC32 checksums for objects.
293 	 *
294 	 * @return true if CRC32 is stored, false otherwise
295 	 */
296 	public abstract boolean hasCRC32Support();
297 
298 	/**
299 	 * Find objects matching the prefix abbreviation.
300 	 *
301 	 * @param matches
302 	 *            set to add any located ObjectIds to. This is an output
303 	 *            parameter.
304 	 * @param id
305 	 *            prefix to search for.
306 	 * @param matchLimit
307 	 *            maximum number of results to return. At most this many
308 	 *            ObjectIds should be added to matches before returning.
309 	 * @throws java.io.IOException
310 	 *             the index cannot be read.
311 	 */
312 	public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
313 			int matchLimit) throws IOException;
314 
315 	/**
316 	 * Represent mutable entry of pack index consisting of object id and offset
317 	 * in pack (both mutable).
318 	 *
319 	 */
320 	public static class MutableEntry {
321 		final MutableObjectId idBuffer = new MutableObjectId();
322 
323 		long offset;
324 
325 		/**
326 		 * Returns offset for this index object entry
327 		 *
328 		 * @return offset of this object in a pack file
329 		 */
330 		public long getOffset() {
331 			return offset;
332 		}
333 
334 		/** @return hex string describing the object id of this entry. */
335 		public String name() {
336 			ensureId();
337 			return idBuffer.name();
338 		}
339 
340 		/** @return a copy of the object id. */
341 		public ObjectId toObjectId() {
342 			ensureId();
343 			return idBuffer.toObjectId();
344 		}
345 
346 		/** @return a complete copy of this entry, that won't modify */
347 		public MutableEntry cloneEntry() {
348 			final MutableEntry r = new MutableEntry();
349 			ensureId();
350 			r.idBuffer.fromObjectId(idBuffer);
351 			r.offset = offset;
352 			return r;
353 		}
354 
355 		void ensureId() {
356 			// Override in implementations.
357 		}
358 	}
359 
360 	abstract class EntriesIterator implements Iterator<MutableEntry> {
361 		protected final MutableEntry entry = initEntry();
362 
363 		protected long returnedNumber = 0;
364 
365 		protected abstract MutableEntry initEntry();
366 
367 		@Override
368 		public boolean hasNext() {
369 			return returnedNumber < getObjectCount();
370 		}
371 
372 		/**
373 		 * Implementation must update {@link #returnedNumber} before returning
374 		 * element.
375 		 */
376 		@Override
377 		public abstract MutableEntry next();
378 
379 		@Override
380 		public void remove() {
381 			throw new UnsupportedOperationException();
382 		}
383 	}
384 }