1 /* 2 * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.internal.storage.file; 46 47 import java.io.File; 48 import java.io.FileInputStream; 49 import java.io.FileNotFoundException; 50 import java.io.IOException; 51 import java.io.InputStream; 52 import java.text.MessageFormat; 53 import java.util.Iterator; 54 import java.util.Set; 55 56 import org.eclipse.jgit.errors.CorruptObjectException; 57 import org.eclipse.jgit.errors.MissingObjectException; 58 import org.eclipse.jgit.internal.JGitText; 59 import org.eclipse.jgit.lib.AbbreviatedObjectId; 60 import org.eclipse.jgit.lib.AnyObjectId; 61 import org.eclipse.jgit.lib.MutableObjectId; 62 import org.eclipse.jgit.lib.ObjectId; 63 import org.eclipse.jgit.util.IO; 64 import org.eclipse.jgit.util.NB; 65 66 /** 67 * Access path to locate objects by {@link ObjectId} in a {@link PackFile}. 68 * <p> 69 * Indexes are strictly redundant information in that we can rebuild all of the 70 * data held in the index file from the on disk representation of the pack file 71 * itself, but it is faster to access for random requests because data is stored 72 * by ObjectId. 73 * </p> 74 */ 75 public abstract class PackIndex implements Iterable<PackIndex.MutableEntry> { 76 /** 77 * Open an existing pack <code>.idx</code> file for reading. 78 * <p> 79 * The format of the file will be automatically detected and a proper access 80 * implementation for that format will be constructed and returned to the 81 * caller. The file may or may not be held open by the returned instance. 82 * </p> 83 * 84 * @param idxFile 85 * existing pack .idx to read. 86 * @return access implementation for the requested file. 87 * @throws FileNotFoundException 88 * the file does not exist. 89 * @throws IOException 90 * the file exists but could not be read due to security errors, 91 * unrecognized data version, or unexpected data corruption. 92 */ 93 public static PackIndex open(final File idxFile) throws IOException { 94 final FileInputStream fd = new FileInputStream(idxFile); 95 try { 96 return read(fd); 97 } catch (IOException ioe) { 98 final String path = idxFile.getAbsolutePath(); 99 final IOException err; 100 err = new IOException(MessageFormat.format(JGitText.get().unreadablePackIndex, path)); 101 err.initCause(ioe); 102 throw err; 103 } finally { 104 try { 105 fd.close(); 106 } catch (IOException err2) { 107 // ignore 108 } 109 } 110 } 111 112 /** 113 * Read an existing pack index file from a buffered stream. 114 * <p> 115 * The format of the file will be automatically detected and a proper access 116 * implementation for that format will be constructed and returned to the 117 * caller. The file may or may not be held open by the returned instance. 118 * 119 * @param fd 120 * stream to read the index file from. The stream must be 121 * buffered as some small IOs are performed against the stream. 122 * The caller is responsible for closing the stream. 123 * @return a copy of the index in-memory. 124 * @throws IOException 125 * the stream cannot be read. 126 * @throws CorruptObjectException 127 * the stream does not contain a valid pack index. 128 */ 129 public static PackIndex read(InputStream fd) throws IOException, 130 CorruptObjectException { 131 final byte[] hdr = new byte[8]; 132 IO.readFully(fd, hdr, 0, hdr.length); 133 if (isTOC(hdr)) { 134 final int v = NB.decodeInt32(hdr, 4); 135 switch (v) { 136 case 2: 137 return new PackIndexV2(fd); 138 default: 139 throw new IOException(MessageFormat.format( 140 JGitText.get().unsupportedPackIndexVersion, 141 Integer.valueOf(v))); 142 } 143 } 144 return new PackIndexV1(fd, hdr); 145 } 146 147 private static boolean isTOC(final byte[] h) { 148 final byte[] toc = PackIndexWriter.TOC; 149 for (int i = 0; i < toc.length; i++) 150 if (h[i] != toc[i]) 151 return false; 152 return true; 153 } 154 155 /** Footer checksum applied on the bottom of the pack file. */ 156 protected byte[] packChecksum; 157 158 /** 159 * Determine if an object is contained within the pack file. 160 * 161 * @param id 162 * the object to look for. Must not be null. 163 * @return true if the object is listed in this index; false otherwise. 164 */ 165 public boolean hasObject(final AnyObjectId id) { 166 return findOffset(id) != -1; 167 } 168 169 /** 170 * Provide iterator that gives access to index entries. Note, that iterator 171 * returns reference to mutable object, the same reference in each call - 172 * for performance reason. If client needs immutable objects, it must copy 173 * returned object on its own. 174 * <p> 175 * Iterator returns objects in SHA-1 lexicographical order. 176 * </p> 177 * 178 * @return iterator over pack index entries 179 */ 180 public abstract Iterator<MutableEntry> iterator(); 181 182 /** 183 * Obtain the total number of objects described by this index. 184 * 185 * @return number of objects in this index, and likewise in the associated 186 * pack that this index was generated from. 187 */ 188 public abstract long getObjectCount(); 189 190 /** 191 * Obtain the total number of objects needing 64 bit offsets. 192 * 193 * @return number of objects in this index using a 64 bit offset; that is an 194 * object positioned after the 2 GB position within the file. 195 */ 196 public abstract long getOffset64Count(); 197 198 /** 199 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 200 * <p> 201 * This method is a constant-time replacement for the following loop: 202 * 203 * <pre> 204 * Iterator<MutableEntry> eItr = index.iterator(); 205 * int curPosition = 0; 206 * while (eItr.hasNext() && curPosition++ < nthPosition) 207 * eItr.next(); 208 * ObjectId result = eItr.next().toObjectId(); 209 * </pre> 210 * 211 * @param nthPosition 212 * position within the traversal of {@link #iterator()} that the 213 * caller needs the object for. The first returned 214 * {@link MutableEntry} is 0, the second is 1, etc. 215 * @return the ObjectId for the corresponding entry. 216 */ 217 public abstract ObjectId getObjectId(long nthPosition); 218 219 /** 220 * Get ObjectId for the n-th object entry returned by {@link #iterator()}. 221 * <p> 222 * This method is a constant-time replacement for the following loop: 223 * 224 * <pre> 225 * Iterator<MutableEntry> eItr = index.iterator(); 226 * int curPosition = 0; 227 * while (eItr.hasNext() && curPosition++ < nthPosition) 228 * eItr.next(); 229 * ObjectId result = eItr.next().toObjectId(); 230 * </pre> 231 * 232 * @param nthPosition 233 * unsigned 32 bit position within the traversal of 234 * {@link #iterator()} that the caller needs the object for. The 235 * first returned {@link MutableEntry} is 0, the second is 1, 236 * etc. Positions past 2**31-1 are negative, but still valid. 237 * @return the ObjectId for the corresponding entry. 238 */ 239 public final ObjectId getObjectId(final int nthPosition) { 240 if (nthPosition >= 0) 241 return getObjectId((long) nthPosition); 242 final int u31 = nthPosition >>> 1; 243 final int one = nthPosition & 1; 244 return getObjectId(((long) u31) << 1 | one); 245 } 246 247 /** 248 * Get offset in a pack for the n-th object entry returned by 249 * {@link #iterator()}. 250 * 251 * @param nthPosition 252 * unsigned 32 bit position within the traversal of 253 * {@link #iterator()} for which the caller needs the offset. The 254 * first returned {@link MutableEntry} is 0, the second is 1, 255 * etc. Positions past 2**31-1 are negative, but still valid. 256 * @return the offset in a pack for the corresponding entry. 257 */ 258 abstract long getOffset(long nthPosition); 259 260 /** 261 * Locate the file offset position for the requested object. 262 * 263 * @param objId 264 * name of the object to locate within the pack. 265 * @return offset of the object's header and compressed content; -1 if the 266 * object does not exist in this index and is thus not stored in the 267 * associated pack. 268 */ 269 public abstract long findOffset(AnyObjectId objId); 270 271 /** 272 * Retrieve stored CRC32 checksum of the requested object raw-data 273 * (including header). 274 * 275 * @param objId 276 * id of object to look for 277 * @return CRC32 checksum of specified object (at 32 less significant bits) 278 * @throws MissingObjectException 279 * when requested ObjectId was not found in this index 280 * @throws UnsupportedOperationException 281 * when this index doesn't support CRC32 checksum 282 */ 283 public abstract long findCRC32(AnyObjectId objId) 284 throws MissingObjectException, UnsupportedOperationException; 285 286 /** 287 * Check whether this index supports (has) CRC32 checksums for objects. 288 * 289 * @return true if CRC32 is stored, false otherwise 290 */ 291 public abstract boolean hasCRC32Support(); 292 293 /** 294 * Find objects matching the prefix abbreviation. 295 * 296 * @param matches 297 * set to add any located ObjectIds to. This is an output 298 * parameter. 299 * @param id 300 * prefix to search for. 301 * @param matchLimit 302 * maximum number of results to return. At most this many 303 * ObjectIds should be added to matches before returning. 304 * @throws IOException 305 * the index cannot be read. 306 */ 307 public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, 308 int matchLimit) throws IOException; 309 310 /** 311 * Represent mutable entry of pack index consisting of object id and offset 312 * in pack (both mutable). 313 * 314 */ 315 public static class MutableEntry { 316 final MutableObjectId idBuffer = new MutableObjectId(); 317 318 long offset; 319 320 /** 321 * Returns offset for this index object entry 322 * 323 * @return offset of this object in a pack file 324 */ 325 public long getOffset() { 326 return offset; 327 } 328 329 /** @return hex string describing the object id of this entry. */ 330 public String name() { 331 ensureId(); 332 return idBuffer.name(); 333 } 334 335 /** @return a copy of the object id. */ 336 public ObjectId toObjectId() { 337 ensureId(); 338 return idBuffer.toObjectId(); 339 } 340 341 /** @return a complete copy of this entry, that won't modify */ 342 public MutableEntry cloneEntry() { 343 final MutableEntry r = new MutableEntry(); 344 ensureId(); 345 r.idBuffer.fromObjectId(idBuffer); 346 r.offset = offset; 347 return r; 348 } 349 350 void ensureId() { 351 // Override in implementations. 352 } 353 } 354 355 abstract class EntriesIterator implements Iterator<MutableEntry> { 356 protected final MutableEntry entry = initEntry(); 357 358 protected long returnedNumber = 0; 359 360 protected abstract MutableEntry initEntry(); 361 362 public boolean hasNext() { 363 return returnedNumber < getObjectCount(); 364 } 365 366 /** 367 * Implementation must update {@link #returnedNumber} before returning 368 * element. 369 */ 370 public abstract MutableEntry next(); 371 372 public void remove() { 373 throw new UnsupportedOperationException(); 374 } 375 } 376 }