View Javadoc
1   /*
2    * Copyright (C) 2010, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.internal.storage.pack;
12  
13  import java.io.IOException;
14  import java.util.Collection;
15  import java.util.List;
16  
17  import org.eclipse.jgit.errors.MissingObjectException;
18  import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
19  import org.eclipse.jgit.lib.AnyObjectId;
20  import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
21  import org.eclipse.jgit.lib.ProgressMonitor;
22  
23  /**
24   * Extension of {@link org.eclipse.jgit.lib.ObjectReader} that supports reusing
25   * objects in packs.
26   * <p>
27   * {@code ObjectReader} implementations may also optionally implement this
28   * interface to support
29   * {@link org.eclipse.jgit.internal.storage.pack.PackWriter} with a means of
30   * copying an object that is already in pack encoding format directly into the
31   * output stream, without incurring decompression and recompression overheads.
32   */
33  public interface ObjectReuseAsIs {
34  	/**
35  	 * Allocate a new {@code PackWriter} state structure for an object.
36  	 * <p>
37  	 * {@link org.eclipse.jgit.internal.storage.pack.PackWriter} allocates these
38  	 * objects to keep track of the per-object state, and how to load the
39  	 * objects efficiently into the generated stream. Implementers may subclass
40  	 * this type with additional object state, such as to remember what file and
41  	 * offset contains the object's pack encoded data.
42  	 *
43  	 * @param objectId
44  	 *            the id of the object that will be packed.
45  	 * @param type
46  	 *            the Git type of the object that will be packed.
47  	 * @return a new instance for this object.
48  	 */
49  	ObjectToPack newObjectToPack(AnyObjectId objectId, int type);
50  
51  	/**
52  	 * Select the best object representation for a packer.
53  	 * <p>
54  	 * Implementations should iterate through all available representations of
55  	 * an object, and pass them in turn to the PackWriter though
56  	 * {@link org.eclipse.jgit.internal.storage.pack.PackWriter#select(ObjectToPack, StoredObjectRepresentation)}
57  	 * so the writer can select the most suitable representation to reuse into
58  	 * the output stream.
59  	 * <p>
60  	 * If the implementation returns CachedPack from
61  	 * {@link #getCachedPacksAndUpdate(BitmapBuilder)} it must consider the
62  	 * representation of any object that is stored in any of the offered
63  	 * CachedPacks. PackWriter relies on this behavior to prune duplicate
64  	 * objects out of the pack stream when it selects a CachedPack and the
65  	 * object was also reached through the thin-pack enumeration.
66  	 * <p>
67  	 * The implementation may choose to consider multiple objects at once on
68  	 * concurrent threads, but must evaluate all representations of an object
69  	 * within the same thread.
70  	 *
71  	 * @param packer
72  	 *            the packer that will write the object in the near future.
73  	 * @param monitor
74  	 *            progress monitor, implementation should update the monitor
75  	 *            once for each item in the iteration when selection is done.
76  	 * @param objects
77  	 *            the objects that are being packed.
78  	 * @throws org.eclipse.jgit.errors.MissingObjectException
79  	 *             there is no representation available for the object, as it is
80  	 *             no longer in the repository. Packing will abort.
81  	 * @throws java.io.IOException
82  	 *             the repository cannot be accessed. Packing will abort.
83  	 */
84  	void selectObjectRepresentation(PackWriter packer,
85  			ProgressMonitor monitor, Iterable<ObjectToPack> objects)
86  			throws IOException, MissingObjectException;
87  
88  	/**
89  	 * Write objects to the pack stream in roughly the order given.
90  	 *
91  	 * {@code PackWriter} invokes this method to write out one or more objects,
92  	 * in approximately the order specified by the iteration over the list. A
93  	 * simple implementation of this method would just iterate the list and
94  	 * output each object:
95  	 *
96  	 * <pre>
97  	 * for (ObjectToPack obj : list)
98  	 *   out.writeObject(obj)
99  	 * </pre>
100 	 *
101 	 * However more sophisticated implementors may try to perform some (small)
102 	 * reordering to access objects that are stored close to each other at
103 	 * roughly the same time. Implementations may choose to write objects out of
104 	 * order, but this may increase pack file size due to using a larger header
105 	 * format to reach a delta base that is later in the stream. It may also
106 	 * reduce data locality for the reader, slowing down data access.
107 	 *
108 	 * Invoking
109 	 * {@link org.eclipse.jgit.internal.storage.pack.PackOutputStream#writeObject(ObjectToPack)}
110 	 * will cause
111 	 * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
112 	 * invoked recursively on {@code this} if the current object is scheduled
113 	 * for reuse.
114 	 *
115 	 * @param out
116 	 *            the stream to write each object to.
117 	 * @param list
118 	 *            the list of objects to write. Objects should be written in
119 	 *            approximately this order. Implementors may resort the list
120 	 *            elements in-place during writing if desired.
121 	 * @throws java.io.IOException
122 	 *             the stream cannot be written to, or one or more required
123 	 *             objects cannot be accessed from the object database.
124 	 */
125 	void writeObjects(PackOutputStream out, List<ObjectToPack> list)
126 			throws IOException;
127 
128 	/**
129 	 * Output a previously selected representation.
130 	 * <p>
131 	 * {@code PackWriter} invokes this method only if a representation
132 	 * previously given to it by {@code selectObjectRepresentation} was chosen
133 	 * for reuse into the output stream. The {@code otp} argument is an instance
134 	 * created by this reader's own {@code newObjectToPack}, and the
135 	 * representation data saved within it also originated from this reader.
136 	 * <p>
137 	 * Implementors must write the object header before copying the raw data to
138 	 * the output stream. The typical implementation is like:
139 	 *
140 	 * <pre>
141 	 * MyToPack mtp = (MyToPack) otp;
142 	 * byte[] raw;
143 	 * if (validate)
144 	 * 	 raw = validate(mtp); // throw SORNAE here, if at all
145 	 * else
146 	 * 	 raw = readFast(mtp);
147 	 * out.writeHeader(mtp, mtp.inflatedSize);
148 	 * out.write(raw);
149 	 * </pre>
150 	 *
151 	 * @param out
152 	 *            stream the object should be written to.
153 	 * @param otp
154 	 *            the object's saved representation information.
155 	 * @param validate
156 	 *            if true the representation must be validated and not be
157 	 *            corrupt before being reused. If false, validation may be
158 	 *            skipped as it will be performed elsewhere in the processing
159 	 *            pipeline.
160 	 * @throws org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException
161 	 *             the previously selected representation is no longer
162 	 *             available. If thrown before {@code out.writeHeader} the pack
163 	 *             writer will try to find another representation, and write
164 	 *             that one instead. If throw after {@code out.writeHeader},
165 	 *             packing will abort.
166 	 * @throws java.io.IOException
167 	 *             the stream's write method threw an exception. Packing will
168 	 *             abort.
169 	 */
170 	void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
171 			boolean validate) throws IOException,
172 			StoredObjectRepresentationNotAvailableException;
173 
174 	/**
175 	 * Append an entire pack's contents onto the output stream.
176 	 * <p>
177 	 * The entire pack, excluding its header and trailing footer is sent.
178 	 *
179 	 * @param out
180 	 *            stream to append the pack onto.
181 	 * @param pack
182 	 *            the cached pack to send.
183 	 * @throws java.io.IOException
184 	 *             the pack cannot be read, or stream did not accept a write.
185 	 */
186 	void copyPackAsIs(PackOutputStream out, CachedPack pack)
187 			throws IOException;
188 
189 	/**
190 	 * Obtain the available cached packs that match the bitmap and update
191 	 * the bitmap by removing the items that are in the CachedPack.
192 	 * <p>
193 	 * A cached pack has known starting points and may be sent entirely as-is,
194 	 * with almost no effort on the sender's part.
195 	 *
196 	 * @param needBitmap
197 	 *            the bitmap that contains all of the objects the client wants.
198 	 * @return the available cached packs.
199 	 * @throws java.io.IOException
200 	 *             the cached packs cannot be listed from the repository.
201 	 *             Callers may choose to ignore this and continue as-if there
202 	 *             were no cached packs.
203 	 */
204 	Collection<CachedPack> getCachedPacksAndUpdate(
205 			BitmapBuilder needBitmap) throws IOException;
206 }