View Javadoc
1   /*
2    * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
4    *
5    * This program and the accompanying materials are made available under the
6    * terms of the Eclipse Distribution License v. 1.0 which is available at
7    * https://www.eclipse.org/org/documents/edl-v10.php.
8    *
9    * SPDX-License-Identifier: BSD-3-Clause
10   */
11  
12  package org.eclipse.jgit.internal.storage.file;
13  
14  import java.io.BufferedOutputStream;
15  import java.io.IOException;
16  import java.io.OutputStream;
17  import java.security.DigestOutputStream;
18  import java.text.MessageFormat;
19  import java.util.List;
20  
21  import org.eclipse.jgit.internal.JGitText;
22  import org.eclipse.jgit.lib.Constants;
23  import org.eclipse.jgit.transport.PackedObjectInfo;
24  import org.eclipse.jgit.util.NB;
25  
26  /**
27   * Creates a table of contents to support random access by
28   * {@link org.eclipse.jgit.internal.storage.file.PackFile}.
29   * <p>
30   * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
31   * random access to any object in the pack by associating an ObjectId to the
32   * byte offset within the pack where the object's data can be read.
33   */
34  public abstract class PackIndexWriter {
35  	/** Magic constant indicating post-version 1 format. */
36  	protected static final byte[] TOC = { -1, 't', 'O', 'c' };
37  
38  	/**
39  	 * Create a new writer for the oldest (most widely understood) format.
40  	 * <p>
41  	 * This method selects an index format that can accurate describe the
42  	 * supplied objects and that will be the most compatible format with older
43  	 * Git implementations.
44  	 * <p>
45  	 * Index version 1 is widely recognized by all Git implementations, but
46  	 * index version 2 (and later) is not as well recognized as it was
47  	 * introduced more than a year later. Index version 1 can only be used if
48  	 * the resulting pack file is under 4 gigabytes in size; packs larger than
49  	 * that limit must use index version 2.
50  	 *
51  	 * @param dst
52  	 *            the stream the index data will be written to. If not already
53  	 *            buffered it will be automatically wrapped in a buffered
54  	 *            stream. Callers are always responsible for closing the stream.
55  	 * @param objs
56  	 *            the objects the caller needs to store in the index. Entries
57  	 *            will be examined until a format can be conclusively selected.
58  	 * @return a new writer to output an index file of the requested format to
59  	 *         the supplied stream.
60  	 * @throws java.lang.IllegalArgumentException
61  	 *             no recognized pack index version can support the supplied
62  	 *             objects. This is likely a bug in the implementation.
63  	 * @see #oldestPossibleFormat(List)
64  	 */
65  	public static PackIndexWriter createOldestPossible(final OutputStream dst,
66  			final List<? extends PackedObjectInfo> objs) {
67  		return createVersion(dst, oldestPossibleFormat(objs));
68  	}
69  
70  	/**
71  	 * Return the oldest (most widely understood) index format.
72  	 * <p>
73  	 * This method selects an index format that can accurate describe the
74  	 * supplied objects and that will be the most compatible format with older
75  	 * Git implementations.
76  	 * <p>
77  	 * Index version 1 is widely recognized by all Git implementations, but
78  	 * index version 2 (and later) is not as well recognized as it was
79  	 * introduced more than a year later. Index version 1 can only be used if
80  	 * the resulting pack file is under 4 gigabytes in size; packs larger than
81  	 * that limit must use index version 2.
82  	 *
83  	 * @param objs
84  	 *            the objects the caller needs to store in the index. Entries
85  	 *            will be examined until a format can be conclusively selected.
86  	 * @return the index format.
87  	 * @throws java.lang.IllegalArgumentException
88  	 *             no recognized pack index version can support the supplied
89  	 *             objects. This is likely a bug in the implementation.
90  	 */
91  	public static int oldestPossibleFormat(
92  			final List<? extends PackedObjectInfo> objs) {
93  		for (PackedObjectInfo oe : objs) {
94  			if (!PackIndexWriterV1.canStore(oe))
95  				return 2;
96  		}
97  		return 1;
98  	}
99  
100 
101 	/**
102 	 * Create a new writer instance for a specific index format version.
103 	 *
104 	 * @param dst
105 	 *            the stream the index data will be written to. If not already
106 	 *            buffered it will be automatically wrapped in a buffered
107 	 *            stream. Callers are always responsible for closing the stream.
108 	 * @param version
109 	 *            index format version number required by the caller. Exactly
110 	 *            this formatted version will be written.
111 	 * @return a new writer to output an index file of the requested format to
112 	 *         the supplied stream.
113 	 * @throws java.lang.IllegalArgumentException
114 	 *             the version requested is not supported by this
115 	 *             implementation.
116 	 */
117 	public static PackIndexWriter createVersion(final OutputStream dst,
118 			final int version) {
119 		switch (version) {
120 		case 1:
121 			return new PackIndexWriterV1(dst);
122 		case 2:
123 			return new PackIndexWriterV2(dst);
124 		default:
125 			throw new IllegalArgumentException(MessageFormat.format(
126 					JGitText.get().unsupportedPackIndexVersion,
127 					Integer.valueOf(version)));
128 		}
129 	}
130 
131 	/** The index data stream we are responsible for creating. */
132 	protected final DigestOutputStream out;
133 
134 	/** A temporary buffer for use during IO to {link #out}. */
135 	protected final byte[] tmp;
136 
137 	/** The entries this writer must pack. */
138 	protected List<? extends PackedObjectInfo> entries;
139 
140 	/** SHA-1 checksum for the entire pack data. */
141 	protected byte[] packChecksum;
142 
143 	/**
144 	 * Create a new writer instance.
145 	 *
146 	 * @param dst
147 	 *            the stream this instance outputs to. If not already buffered
148 	 *            it will be automatically wrapped in a buffered stream.
149 	 */
150 	protected PackIndexWriter(OutputStream dst) {
151 		out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
152 				: new BufferedOutputStream(dst),
153 				Constants.newMessageDigest());
154 		tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
155 	}
156 
157 	/**
158 	 * Write all object entries to the index stream.
159 	 * <p>
160 	 * After writing the stream passed to the factory is flushed but remains
161 	 * open. Callers are always responsible for closing the output stream.
162 	 *
163 	 * @param toStore
164 	 *            sorted list of objects to store in the index. The caller must
165 	 *            have previously sorted the list using
166 	 *            {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
167 	 *            {@link java.lang.Comparable} implementation.
168 	 * @param packDataChecksum
169 	 *            checksum signature of the entire pack data content. This is
170 	 *            traditionally the last 20 bytes of the pack file's own stream.
171 	 * @throws java.io.IOException
172 	 *             an error occurred while writing to the output stream, or this
173 	 *             index format cannot store the object data supplied.
174 	 */
175 	public void write(final List<? extends PackedObjectInfo> toStore,
176 			final byte[] packDataChecksum) throws IOException {
177 		entries = toStore;
178 		packChecksum = packDataChecksum;
179 		writeImpl();
180 		out.flush();
181 	}
182 
183 	/**
184 	 * Writes the index file to {@link #out}.
185 	 * <p>
186 	 * Implementations should go something like:
187 	 *
188 	 * <pre>
189 	 * writeFanOutTable();
190 	 * for (final PackedObjectInfo po : entries)
191 	 * 	writeOneEntry(po);
192 	 * writeChecksumFooter();
193 	 * </pre>
194 	 *
195 	 * <p>
196 	 * Where the logic for <code>writeOneEntry</code> is specific to the index
197 	 * format in use. Additional headers/footers may be used if necessary and
198 	 * the {@link #entries} collection may be iterated over more than once if
199 	 * necessary. Implementors therefore have complete control over the data.
200 	 *
201 	 * @throws java.io.IOException
202 	 *             an error occurred while writing to the output stream, or this
203 	 *             index format cannot store the object data supplied.
204 	 */
205 	protected abstract void writeImpl() throws IOException;
206 
207 	/**
208 	 * Output the version 2 (and later) TOC header, with version number.
209 	 * <p>
210 	 * Post version 1 all index files start with a TOC header that makes the
211 	 * file an invalid version 1 file, and then includes the version number.
212 	 * This header is necessary to recognize a version 1 from a version 2
213 	 * formatted index.
214 	 *
215 	 * @param version
216 	 *            version number of this index format being written.
217 	 * @throws java.io.IOException
218 	 *             an error occurred while writing to the output stream.
219 	 */
220 	protected void writeTOC(int version) throws IOException {
221 		out.write(TOC);
222 		NB.encodeInt32(tmp, 0, version);
223 		out.write(tmp, 0, 4);
224 	}
225 
226 	/**
227 	 * Output the standard 256 entry first-level fan-out table.
228 	 * <p>
229 	 * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
230 	 * counts. Each count represents the number of objects within this index
231 	 * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
232 	 * count's position in the fan-out table.
233 	 *
234 	 * @throws java.io.IOException
235 	 *             an error occurred while writing to the output stream.
236 	 */
237 	protected void writeFanOutTable() throws IOException {
238 		final int[] fanout = new int[256];
239 		for (PackedObjectInfo po : entries)
240 			fanout[po.getFirstByte() & 0xff]++;
241 		for (int i = 1; i < 256; i++)
242 			fanout[i] += fanout[i - 1];
243 		for (int n : fanout) {
244 			NB.encodeInt32(tmp, 0, n);
245 			out.write(tmp, 0, 4);
246 		}
247 	}
248 
249 	/**
250 	 * Output the standard two-checksum index footer.
251 	 * <p>
252 	 * The standard footer contains two checksums (20 byte SHA-1 values):
253 	 * <ol>
254 	 * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
255 	 * <li>Index data checksum - checksum of all index bytes written, including
256 	 * the pack data checksum above.</li>
257 	 * </ol>
258 	 *
259 	 * @throws java.io.IOException
260 	 *             an error occurred while writing to the output stream.
261 	 */
262 	protected void writeChecksumFooter() throws IOException {
263 		out.write(packChecksum);
264 		out.on(false);
265 		out.write(out.getMessageDigest().digest());
266 	}
267 }