View Javadoc
1   /*
2    * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.File;
48  import java.io.FileInputStream;
49  import java.io.FileNotFoundException;
50  import java.io.IOException;
51  import java.io.InputStream;
52  import java.text.MessageFormat;
53  import java.util.Iterator;
54  import java.util.Set;
55  
56  import org.eclipse.jgit.errors.CorruptObjectException;
57  import org.eclipse.jgit.errors.MissingObjectException;
58  import org.eclipse.jgit.internal.JGitText;
59  import org.eclipse.jgit.lib.AbbreviatedObjectId;
60  import org.eclipse.jgit.lib.AnyObjectId;
61  import org.eclipse.jgit.lib.MutableObjectId;
62  import org.eclipse.jgit.lib.ObjectId;
63  import org.eclipse.jgit.util.IO;
64  import org.eclipse.jgit.util.NB;
65  
66  /**
67   * Access path to locate objects by {@link ObjectId} in a {@link PackFile}.
68   * <p>
69   * Indexes are strictly redundant information in that we can rebuild all of the
70   * data held in the index file from the on disk representation of the pack file
71   * itself, but it is faster to access for random requests because data is stored
72   * by ObjectId.
73   * </p>
74   */
75  public abstract class PackIndex implements Iterable<PackIndex.MutableEntry> {
76  	/**
77  	 * Open an existing pack <code>.idx</code> file for reading.
78  	 * <p>
79  	 * The format of the file will be automatically detected and a proper access
80  	 * implementation for that format will be constructed and returned to the
81  	 * caller. The file may or may not be held open by the returned instance.
82  	 * </p>
83  	 *
84  	 * @param idxFile
85  	 *            existing pack .idx to read.
86  	 * @return access implementation for the requested file.
87  	 * @throws FileNotFoundException
88  	 *             the file does not exist.
89  	 * @throws IOException
90  	 *             the file exists but could not be read due to security errors,
91  	 *             unrecognized data version, or unexpected data corruption.
92  	 */
93  	public static PackIndex open(final File idxFile) throws IOException {
94  		final FileInputStream fd = new FileInputStream(idxFile);
95  		try {
96  			return read(fd);
97  		} catch (IOException ioe) {
98  			final String path = idxFile.getAbsolutePath();
99  			final IOException err;
100 			err = new IOException(MessageFormat.format(JGitText.get().unreadablePackIndex, path));
101 			err.initCause(ioe);
102 			throw err;
103 		} finally {
104 			try {
105 				fd.close();
106 			} catch (IOException err2) {
107 				// ignore
108 			}
109 		}
110 	}
111 
112 	/**
113 	 * Read an existing pack index file from a buffered stream.
114 	 * <p>
115 	 * The format of the file will be automatically detected and a proper access
116 	 * implementation for that format will be constructed and returned to the
117 	 * caller. The file may or may not be held open by the returned instance.
118 	 *
119 	 * @param fd
120 	 *            stream to read the index file from. The stream must be
121 	 *            buffered as some small IOs are performed against the stream.
122 	 *            The caller is responsible for closing the stream.
123 	 * @return a copy of the index in-memory.
124 	 * @throws IOException
125 	 *             the stream cannot be read.
126 	 * @throws CorruptObjectException
127 	 *             the stream does not contain a valid pack index.
128 	 */
129 	public static PackIndex read(InputStream fd) throws IOException,
130 			CorruptObjectException {
131 		final byte[] hdr = new byte[8];
132 		IO.readFully(fd, hdr, 0, hdr.length);
133 		if (isTOC(hdr)) {
134 			final int v = NB.decodeInt32(hdr, 4);
135 			switch (v) {
136 			case 2:
137 				return new PackIndexV2(fd);
138 			default:
139 				throw new IOException(MessageFormat.format(
140 						JGitText.get().unsupportedPackIndexVersion,
141 						Integer.valueOf(v)));
142 			}
143 		}
144 		return new PackIndexV1(fd, hdr);
145 	}
146 
147 	private static boolean isTOC(final byte[] h) {
148 		final byte[] toc = PackIndexWriter.TOC;
149 		for (int i = 0; i < toc.length; i++)
150 			if (h[i] != toc[i])
151 				return false;
152 		return true;
153 	}
154 
155 	/** Footer checksum applied on the bottom of the pack file. */
156 	protected byte[] packChecksum;
157 
158 	/**
159 	 * Determine if an object is contained within the pack file.
160 	 *
161 	 * @param id
162 	 *            the object to look for. Must not be null.
163 	 * @return true if the object is listed in this index; false otherwise.
164 	 */
165 	public boolean hasObject(final AnyObjectId id) {
166 		return findOffset(id) != -1;
167 	}
168 
169 	/**
170 	 * Provide iterator that gives access to index entries. Note, that iterator
171 	 * returns reference to mutable object, the same reference in each call -
172 	 * for performance reason. If client needs immutable objects, it must copy
173 	 * returned object on its own.
174 	 * <p>
175 	 * Iterator returns objects in SHA-1 lexicographical order.
176 	 * </p>
177 	 *
178 	 * @return iterator over pack index entries
179 	 */
180 	public abstract Iterator<MutableEntry> iterator();
181 
182 	/**
183 	 * Obtain the total number of objects described by this index.
184 	 *
185 	 * @return number of objects in this index, and likewise in the associated
186 	 *         pack that this index was generated from.
187 	 */
188 	public abstract long getObjectCount();
189 
190 	/**
191 	 * Obtain the total number of objects needing 64 bit offsets.
192 	 *
193 	 * @return number of objects in this index using a 64 bit offset; that is an
194 	 *         object positioned after the 2 GB position within the file.
195 	 */
196 	public abstract long getOffset64Count();
197 
198 	/**
199 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
200 	 * <p>
201 	 * This method is a constant-time replacement for the following loop:
202 	 *
203 	 * <pre>
204 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
205 	 * int curPosition = 0;
206 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
207 	 * 	eItr.next();
208 	 * ObjectId result = eItr.next().toObjectId();
209 	 * </pre>
210 	 *
211 	 * @param nthPosition
212 	 *            position within the traversal of {@link #iterator()} that the
213 	 *            caller needs the object for. The first returned
214 	 *            {@link MutableEntry} is 0, the second is 1, etc.
215 	 * @return the ObjectId for the corresponding entry.
216 	 */
217 	public abstract ObjectId getObjectId(long nthPosition);
218 
219 	/**
220 	 * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
221 	 * <p>
222 	 * This method is a constant-time replacement for the following loop:
223 	 *
224 	 * <pre>
225 	 * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
226 	 * int curPosition = 0;
227 	 * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
228 	 * 	eItr.next();
229 	 * ObjectId result = eItr.next().toObjectId();
230 	 * </pre>
231 	 *
232 	 * @param nthPosition
233 	 *            unsigned 32 bit position within the traversal of
234 	 *            {@link #iterator()} that the caller needs the object for. The
235 	 *            first returned {@link MutableEntry} is 0, the second is 1,
236 	 *            etc. Positions past 2**31-1 are negative, but still valid.
237 	 * @return the ObjectId for the corresponding entry.
238 	 */
239 	public final ObjectId getObjectId(final int nthPosition) {
240 		if (nthPosition >= 0)
241 			return getObjectId((long) nthPosition);
242 		final int u31 = nthPosition >>> 1;
243 		final int one = nthPosition & 1;
244 		return getObjectId(((long) u31) << 1 | one);
245 	}
246 
247 	/**
248 	 * Get offset in a pack for the n-th object entry returned by
249 	 * {@link #iterator()}.
250 	 *
251 	 * @param nthPosition
252 	 *            unsigned 32 bit position within the traversal of
253 	 *            {@link #iterator()} for which the caller needs the offset. The
254 	 *            first returned {@link MutableEntry} is 0, the second is 1,
255 	 *            etc. Positions past 2**31-1 are negative, but still valid.
256 	 * @return the offset in a pack for the corresponding entry.
257 	 */
258 	abstract long getOffset(long nthPosition);
259 
260 	/**
261 	 * Locate the file offset position for the requested object.
262 	 *
263 	 * @param objId
264 	 *            name of the object to locate within the pack.
265 	 * @return offset of the object's header and compressed content; -1 if the
266 	 *         object does not exist in this index and is thus not stored in the
267 	 *         associated pack.
268 	 */
269 	public abstract long findOffset(AnyObjectId objId);
270 
271 	/**
272 	 * Retrieve stored CRC32 checksum of the requested object raw-data
273 	 * (including header).
274 	 *
275 	 * @param objId
276 	 *            id of object to look for
277 	 * @return CRC32 checksum of specified object (at 32 less significant bits)
278 	 * @throws MissingObjectException
279 	 *             when requested ObjectId was not found in this index
280 	 * @throws UnsupportedOperationException
281 	 *             when this index doesn't support CRC32 checksum
282 	 */
283 	public abstract long findCRC32(AnyObjectId objId)
284 			throws MissingObjectException, UnsupportedOperationException;
285 
286 	/**
287 	 * Check whether this index supports (has) CRC32 checksums for objects.
288 	 *
289 	 * @return true if CRC32 is stored, false otherwise
290 	 */
291 	public abstract boolean hasCRC32Support();
292 
293 	/**
294 	 * Find objects matching the prefix abbreviation.
295 	 *
296 	 * @param matches
297 	 *            set to add any located ObjectIds to. This is an output
298 	 *            parameter.
299 	 * @param id
300 	 *            prefix to search for.
301 	 * @param matchLimit
302 	 *            maximum number of results to return. At most this many
303 	 *            ObjectIds should be added to matches before returning.
304 	 * @throws IOException
305 	 *             the index cannot be read.
306 	 */
307 	public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
308 			int matchLimit) throws IOException;
309 
310 	/**
311 	 * Represent mutable entry of pack index consisting of object id and offset
312 	 * in pack (both mutable).
313 	 *
314 	 */
315 	public static class MutableEntry {
316 		final MutableObjectId idBuffer = new MutableObjectId();
317 
318 		long offset;
319 
320 		/**
321 		 * Returns offset for this index object entry
322 		 *
323 		 * @return offset of this object in a pack file
324 		 */
325 		public long getOffset() {
326 			return offset;
327 		}
328 
329 		/** @return hex string describing the object id of this entry. */
330 		public String name() {
331 			ensureId();
332 			return idBuffer.name();
333 		}
334 
335 		/** @return a copy of the object id. */
336 		public ObjectId toObjectId() {
337 			ensureId();
338 			return idBuffer.toObjectId();
339 		}
340 
341 		/** @return a complete copy of this entry, that won't modify */
342 		public MutableEntry cloneEntry() {
343 			final MutableEntry r = new MutableEntry();
344 			ensureId();
345 			r.idBuffer.fromObjectId(idBuffer);
346 			r.offset = offset;
347 			return r;
348 		}
349 
350 		void ensureId() {
351 			// Override in implementations.
352 		}
353 	}
354 
355 	abstract class EntriesIterator implements Iterator<MutableEntry> {
356 		protected final MutableEntry entry = initEntry();
357 
358 		protected long returnedNumber = 0;
359 
360 		protected abstract MutableEntry initEntry();
361 
362 		public boolean hasNext() {
363 			return returnedNumber < getObjectCount();
364 		}
365 
366 		/**
367 		 * Implementation must update {@link #returnedNumber} before returning
368 		 * element.
369 		 */
370 		public abstract MutableEntry next();
371 
372 		public void remove() {
373 			throw new UnsupportedOperationException();
374 		}
375 	}
376 }