View Javadoc
1   /*
2    * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.BufferedOutputStream;
48  import java.io.IOException;
49  import java.io.OutputStream;
50  import java.security.DigestOutputStream;
51  import java.text.MessageFormat;
52  import java.util.List;
53  
54  import org.eclipse.jgit.internal.JGitText;
55  import org.eclipse.jgit.lib.Constants;
56  import org.eclipse.jgit.transport.PackedObjectInfo;
57  import org.eclipse.jgit.util.NB;
58  
59  /**
60   * Creates a table of contents to support random access by
61   * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
62   * <p>
63   * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
64   * random access to any object in the pack by associating an ObjectId to the
65   * byte offset within the pack where the object's data can be read.
66   */
67  public abstract class PackIndexWriter {
68  	/** Magic constant indicating post-version 1 format. */
69  	protected static final byte[] TOC = { -1, 't', 'O', 'c' };
70  
71  	/**
72  	 * Create a new writer for the oldest (most widely understood) format.
73  	 * <p>
74  	 * This method selects an index format that can accurate describe the
75  	 * supplied objects and that will be the most compatible format with older
76  	 * Git implementations.
77  	 * <p>
78  	 * Index version 1 is widely recognized by all Git implementations, but
79  	 * index version 2 (and later) is not as well recognized as it was
80  	 * introduced more than a year later. Index version 1 can only be used if
81  	 * the resulting pack file is under 4 gigabytes in size; packs larger than
82  	 * that limit must use index version 2.
83  	 *
84  	 * @param dst
85  	 *            the stream the index data will be written to. If not already
86  	 *            buffered it will be automatically wrapped in a buffered
87  	 *            stream. Callers are always responsible for closing the stream.
88  	 * @param objs
89  	 *            the objects the caller needs to store in the index. Entries
90  	 *            will be examined until a format can be conclusively selected.
91  	 * @return a new writer to output an index file of the requested format to
92  	 *         the supplied stream.
93  	 * @throws java.lang.IllegalArgumentException
94  	 *             no recognized pack index version can support the supplied
95  	 *             objects. This is likely a bug in the implementation.
96  	 * @see #oldestPossibleFormat(List)
97  	 */
98  	public static PackIndexWriter createOldestPossible(final OutputStream dst,
99  			final List<? extends PackedObjectInfo> objs) {
100 		return createVersion(dst, oldestPossibleFormat(objs));
101 	}
102 
103 	/**
104 	 * Return the oldest (most widely understood) index format.
105 	 * <p>
106 	 * This method selects an index format that can accurate describe the
107 	 * supplied objects and that will be the most compatible format with older
108 	 * Git implementations.
109 	 * <p>
110 	 * Index version 1 is widely recognized by all Git implementations, but
111 	 * index version 2 (and later) is not as well recognized as it was
112 	 * introduced more than a year later. Index version 1 can only be used if
113 	 * the resulting pack file is under 4 gigabytes in size; packs larger than
114 	 * that limit must use index version 2.
115 	 *
116 	 * @param objs
117 	 *            the objects the caller needs to store in the index. Entries
118 	 *            will be examined until a format can be conclusively selected.
119 	 * @return the index format.
120 	 * @throws java.lang.IllegalArgumentException
121 	 *             no recognized pack index version can support the supplied
122 	 *             objects. This is likely a bug in the implementation.
123 	 */
124 	public static int oldestPossibleFormat(
125 			final List<? extends PackedObjectInfo> objs) {
126 		for (PackedObjectInfo oe : objs) {
127 			if (!PackIndexWriterV1.canStore(oe))
128 				return 2;
129 		}
130 		return 1;
131 	}
132 
133 
134 	/**
135 	 * Create a new writer instance for a specific index format version.
136 	 *
137 	 * @param dst
138 	 *            the stream the index data will be written to. If not already
139 	 *            buffered it will be automatically wrapped in a buffered
140 	 *            stream. Callers are always responsible for closing the stream.
141 	 * @param version
142 	 *            index format version number required by the caller. Exactly
143 	 *            this formatted version will be written.
144 	 * @return a new writer to output an index file of the requested format to
145 	 *         the supplied stream.
146 	 * @throws java.lang.IllegalArgumentException
147 	 *             the version requested is not supported by this
148 	 *             implementation.
149 	 */
150 	public static PackIndexWriter createVersion(final OutputStream dst,
151 			final int version) {
152 		switch (version) {
153 		case 1:
154 			return new PackIndexWriterV1(dst);
155 		case 2:
156 			return new PackIndexWriterV2(dst);
157 		default:
158 			throw new IllegalArgumentException(MessageFormat.format(
159 					JGitText.get().unsupportedPackIndexVersion,
160 					Integer.valueOf(version)));
161 		}
162 	}
163 
164 	/** The index data stream we are responsible for creating. */
165 	protected final DigestOutputStream out;
166 
167 	/** A temporary buffer for use during IO to {link #out}. */
168 	protected final byte[] tmp;
169 
170 	/** The entries this writer must pack. */
171 	protected List<? extends PackedObjectInfo> entries;
172 
173 	/** SHA-1 checksum for the entire pack data. */
174 	protected byte[] packChecksum;
175 
176 	/**
177 	 * Create a new writer instance.
178 	 *
179 	 * @param dst
180 	 *            the stream this instance outputs to. If not already buffered
181 	 *            it will be automatically wrapped in a buffered stream.
182 	 */
183 	protected PackIndexWriter(OutputStream dst) {
184 		out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
185 				: new BufferedOutputStream(dst),
186 				Constants.newMessageDigest());
187 		tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
188 	}
189 
190 	/**
191 	 * Write all object entries to the index stream.
192 	 * <p>
193 	 * After writing the stream passed to the factory is flushed but remains
194 	 * open. Callers are always responsible for closing the output stream.
195 	 *
196 	 * @param toStore
197 	 *            sorted list of objects to store in the index. The caller must
198 	 *            have previously sorted the list using
199 	 *            {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
200 	 *            {@link java.lang.Comparable} implementation.
201 	 * @param packDataChecksum
202 	 *            checksum signature of the entire pack data content. This is
203 	 *            traditionally the last 20 bytes of the pack file's own stream.
204 	 * @throws java.io.IOException
205 	 *             an error occurred while writing to the output stream, or this
206 	 *             index format cannot store the object data supplied.
207 	 */
208 	public void write(final List<? extends PackedObjectInfo> toStore,
209 			final byte[] packDataChecksum) throws IOException {
210 		entries = toStore;
211 		packChecksum = packDataChecksum;
212 		writeImpl();
213 		out.flush();
214 	}
215 
216 	/**
217 	 * Writes the index file to {@link #out}.
218 	 * <p>
219 	 * Implementations should go something like:
220 	 *
221 	 * <pre>
222 	 * writeFanOutTable();
223 	 * for (final PackedObjectInfo po : entries)
224 	 * 	writeOneEntry(po);
225 	 * writeChecksumFooter();
226 	 * </pre>
227 	 *
228 	 * <p>
229 	 * Where the logic for <code>writeOneEntry</code> is specific to the index
230 	 * format in use. Additional headers/footers may be used if necessary and
231 	 * the {@link #entries} collection may be iterated over more than once if
232 	 * necessary. Implementors therefore have complete control over the data.
233 	 *
234 	 * @throws java.io.IOException
235 	 *             an error occurred while writing to the output stream, or this
236 	 *             index format cannot store the object data supplied.
237 	 */
238 	protected abstract void writeImpl() throws IOException;
239 
240 	/**
241 	 * Output the version 2 (and later) TOC header, with version number.
242 	 * <p>
243 	 * Post version 1 all index files start with a TOC header that makes the
244 	 * file an invalid version 1 file, and then includes the version number.
245 	 * This header is necessary to recognize a version 1 from a version 2
246 	 * formatted index.
247 	 *
248 	 * @param version
249 	 *            version number of this index format being written.
250 	 * @throws java.io.IOException
251 	 *             an error occurred while writing to the output stream.
252 	 */
253 	protected void writeTOC(int version) throws IOException {
254 		out.write(TOC);
255 		NB.encodeInt32(tmp, 0, version);
256 		out.write(tmp, 0, 4);
257 	}
258 
259 	/**
260 	 * Output the standard 256 entry first-level fan-out table.
261 	 * <p>
262 	 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
263 	 * counts. Each count represents the number of objects within this index
264 	 * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
265 	 * count's position in the fan-out table.
266 	 *
267 	 * @throws java.io.IOException
268 	 *             an error occurred while writing to the output stream.
269 	 */
270 	protected void writeFanOutTable() throws IOException {
271 		final int[] fanout = new int[256];
272 		for (PackedObjectInfo po : entries)
273 			fanout[po.getFirstByte() & 0xff]++;
274 		for (int i = 1; i < 256; i++)
275 			fanout[i] += fanout[i - 1];
276 		for (int n : fanout) {
277 			NB.encodeInt32(tmp, 0, n);
278 			out.write(tmp, 0, 4);
279 		}
280 	}
281 
282 	/**
283 	 * Output the standard two-checksum index footer.
284 	 * <p>
285 	 * The standard footer contains two checksums (20 byte SHA-1 values):
286 	 * <ol>
287 	 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
288 	 * <li>Index data checksum - checksum of all index bytes written, including
289 	 * the pack data checksum above.</li>
290 	 * </ol>
291 	 *
292 	 * @throws java.io.IOException
293 	 *             an error occurred while writing to the output stream.
294 	 */
295 	protected void writeChecksumFooter() throws IOException {
296 		out.write(packChecksum);
297 		out.on(false);
298 		out.write(out.getMessageDigest().digest());
299 	}
300 }