1 /* 2 * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com> 3 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 4 * and other copyright owners as documented in the project's IP log. 5 * 6 * This program and the accompanying materials are made available 7 * under the terms of the Eclipse Distribution License v1.0 which 8 * accompanies this distribution, is reproduced below, and is 9 * available at http://www.eclipse.org/org/documents/edl-v10.php 10 * 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials provided 23 * with the distribution. 24 * 25 * - Neither the name of the Eclipse Foundation, Inc. nor the 26 * names of its contributors may be used to endorse or promote 27 * products derived from this software without specific prior 28 * written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 35 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 39 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 40 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 package org.eclipse.jgit.internal.storage.file; 46 47 import java.io.BufferedOutputStream; 48 import java.io.IOException; 49 import java.io.OutputStream; 50 import java.security.DigestOutputStream; 51 import java.text.MessageFormat; 52 import java.util.List; 53 54 import org.eclipse.jgit.internal.JGitText; 55 import org.eclipse.jgit.lib.Constants; 56 import org.eclipse.jgit.lib.ObjectId; 57 import org.eclipse.jgit.transport.PackedObjectInfo; 58 import org.eclipse.jgit.util.NB; 59 import org.eclipse.jgit.util.io.SafeBufferedOutputStream; 60 61 /** 62 * Creates a table of contents to support random access by {@link PackFile}. 63 * <p> 64 * Pack index files (the <code>.idx</code> suffix in a pack file pair) 65 * provides random access to any object in the pack by associating an ObjectId 66 * to the byte offset within the pack where the object's data can be read. 67 */ 68 public abstract class PackIndexWriter { 69 /** Magic constant indicating post-version 1 format. */ 70 protected static final byte[] TOC = { -1, 't', 'O', 'c' }; 71 72 /** 73 * Create a new writer for the oldest (most widely understood) format. 74 * <p> 75 * This method selects an index format that can accurate describe the 76 * supplied objects and that will be the most compatible format with older 77 * Git implementations. 78 * <p> 79 * Index version 1 is widely recognized by all Git implementations, but 80 * index version 2 (and later) is not as well recognized as it was 81 * introduced more than a year later. Index version 1 can only be used if 82 * the resulting pack file is under 4 gigabytes in size; packs larger than 83 * that limit must use index version 2. 84 * 85 * @param dst 86 * the stream the index data will be written to. If not already 87 * buffered it will be automatically wrapped in a buffered 88 * stream. Callers are always responsible for closing the stream. 89 * @param objs 90 * the objects the caller needs to store in the index. Entries 91 * will be examined until a format can be conclusively selected. 92 * @return a new writer to output an index file of the requested format to 93 * the supplied stream. 94 * @throws IllegalArgumentException 95 * no recognized pack index version can support the supplied 96 * objects. This is likely a bug in the implementation. 97 * @see #oldestPossibleFormat(List) 98 */ 99 public static PackIndexWriter createOldestPossible(final OutputStream dst, 100 final List<? extends PackedObjectInfo> objs) { 101 return createVersion(dst, oldestPossibleFormat(objs)); 102 } 103 104 /** 105 * Return the oldest (most widely understood) index format. 106 * <p> 107 * This method selects an index format that can accurate describe the 108 * supplied objects and that will be the most compatible format with older 109 * Git implementations. 110 * <p> 111 * Index version 1 is widely recognized by all Git implementations, but 112 * index version 2 (and later) is not as well recognized as it was 113 * introduced more than a year later. Index version 1 can only be used if 114 * the resulting pack file is under 4 gigabytes in size; packs larger than 115 * that limit must use index version 2. 116 * 117 * @param objs 118 * the objects the caller needs to store in the index. Entries 119 * will be examined until a format can be conclusively selected. 120 * @return the index format. 121 * @throws IllegalArgumentException 122 * no recognized pack index version can support the supplied 123 * objects. This is likely a bug in the implementation. 124 */ 125 public static int oldestPossibleFormat( 126 final List<? extends PackedObjectInfo> objs) { 127 for (final PackedObjectInfo oe : objs) { 128 if (!PackIndexWriterV1.canStore(oe)) 129 return 2; 130 } 131 return 1; 132 } 133 134 135 /** 136 * Create a new writer instance for a specific index format version. 137 * 138 * @param dst 139 * the stream the index data will be written to. If not already 140 * buffered it will be automatically wrapped in a buffered 141 * stream. Callers are always responsible for closing the stream. 142 * @param version 143 * index format version number required by the caller. Exactly 144 * this formatted version will be written. 145 * @return a new writer to output an index file of the requested format to 146 * the supplied stream. 147 * @throws IllegalArgumentException 148 * the version requested is not supported by this 149 * implementation. 150 */ 151 public static PackIndexWriter createVersion(final OutputStream dst, 152 final int version) { 153 switch (version) { 154 case 1: 155 return new PackIndexWriterV1(dst); 156 case 2: 157 return new PackIndexWriterV2(dst); 158 default: 159 throw new IllegalArgumentException(MessageFormat.format( 160 JGitText.get().unsupportedPackIndexVersion, 161 Integer.valueOf(version))); 162 } 163 } 164 165 /** The index data stream we are responsible for creating. */ 166 protected final DigestOutputStream out; 167 168 /** A temporary buffer for use during IO to {link #out}. */ 169 protected final byte[] tmp; 170 171 /** The entries this writer must pack. */ 172 protected List<? extends PackedObjectInfo> entries; 173 174 /** SHA-1 checksum for the entire pack data. */ 175 protected byte[] packChecksum; 176 177 /** 178 * Create a new writer instance. 179 * 180 * @param dst 181 * the stream this instance outputs to. If not already buffered 182 * it will be automatically wrapped in a buffered stream. 183 */ 184 protected PackIndexWriter(final OutputStream dst) { 185 out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst 186 : new SafeBufferedOutputStream(dst), 187 Constants.newMessageDigest()); 188 tmp = new byte[4 + Constants.OBJECT_ID_LENGTH]; 189 } 190 191 /** 192 * Write all object entries to the index stream. 193 * <p> 194 * After writing the stream passed to the factory is flushed but remains 195 * open. Callers are always responsible for closing the output stream. 196 * 197 * @param toStore 198 * sorted list of objects to store in the index. The caller must 199 * have previously sorted the list using {@link PackedObjectInfo}'s 200 * native {@link Comparable} implementation. 201 * @param packDataChecksum 202 * checksum signature of the entire pack data content. This is 203 * traditionally the last 20 bytes of the pack file's own stream. 204 * @throws IOException 205 * an error occurred while writing to the output stream, or this 206 * index format cannot store the object data supplied. 207 */ 208 public void write(final List<? extends PackedObjectInfo> toStore, 209 final byte[] packDataChecksum) throws IOException { 210 entries = toStore; 211 packChecksum = packDataChecksum; 212 writeImpl(); 213 out.flush(); 214 } 215 216 /** 217 * Writes the index file to {@link #out}. 218 * <p> 219 * Implementations should go something like: 220 * 221 * <pre> 222 * writeFanOutTable(); 223 * for (final PackedObjectInfo po : entries) 224 * writeOneEntry(po); 225 * writeChecksumFooter(); 226 * </pre> 227 * 228 * <p> 229 * Where the logic for <code>writeOneEntry</code> is specific to the index 230 * format in use. Additional headers/footers may be used if necessary and 231 * the {@link #entries} collection may be iterated over more than once if 232 * necessary. Implementors therefore have complete control over the data. 233 * 234 * @throws IOException 235 * an error occurred while writing to the output stream, or this 236 * index format cannot store the object data supplied. 237 */ 238 protected abstract void writeImpl() throws IOException; 239 240 /** 241 * Output the version 2 (and later) TOC header, with version number. 242 * <p> 243 * Post version 1 all index files start with a TOC header that makes the 244 * file an invalid version 1 file, and then includes the version number. 245 * This header is necessary to recognize a version 1 from a version 2 246 * formatted index. 247 * 248 * @param version 249 * version number of this index format being written. 250 * @throws IOException 251 * an error occurred while writing to the output stream. 252 */ 253 protected void writeTOC(final int version) throws IOException { 254 out.write(TOC); 255 NB.encodeInt32(tmp, 0, version); 256 out.write(tmp, 0, 4); 257 } 258 259 /** 260 * Output the standard 256 entry first-level fan-out table. 261 * <p> 262 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer 263 * counts. Each count represents the number of objects within this index 264 * whose {@link ObjectId#getFirstByte()} matches the count's position in the 265 * fan-out table. 266 * 267 * @throws IOException 268 * an error occurred while writing to the output stream. 269 */ 270 protected void writeFanOutTable() throws IOException { 271 final int[] fanout = new int[256]; 272 for (final PackedObjectInfo po : entries) 273 fanout[po.getFirstByte() & 0xff]++; 274 for (int i = 1; i < 256; i++) 275 fanout[i] += fanout[i - 1]; 276 for (final int n : fanout) { 277 NB.encodeInt32(tmp, 0, n); 278 out.write(tmp, 0, 4); 279 } 280 } 281 282 /** 283 * Output the standard two-checksum index footer. 284 * <p> 285 * The standard footer contains two checksums (20 byte SHA-1 values): 286 * <ol> 287 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li> 288 * <li>Index data checksum - checksum of all index bytes written, including 289 * the pack data checksum above.</li> 290 * </ol> 291 * 292 * @throws IOException 293 * an error occurred while writing to the output stream. 294 */ 295 protected void writeChecksumFooter() throws IOException { 296 out.write(packChecksum); 297 out.on(false); 298 out.write(out.getMessageDigest().digest()); 299 } 300 }