1 /* 2 * Copyright (C) 2010, Google Inc. and others 3 * 4 * This program and the accompanying materials are made available under the 5 * terms of the Eclipse Distribution License v. 1.0 which is available at 6 * https://www.eclipse.org/org/documents/edl-v10.php. 7 * 8 * SPDX-License-Identifier: BSD-3-Clause 9 */ 10 11 package org.eclipse.jgit.internal.storage.pack; 12 13 import java.io.IOException; 14 import java.util.Collection; 15 import java.util.List; 16 17 import org.eclipse.jgit.errors.MissingObjectException; 18 import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; 19 import org.eclipse.jgit.lib.AnyObjectId; 20 import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; 21 import org.eclipse.jgit.lib.ProgressMonitor; 22 23 /** 24 * Extension of {@link org.eclipse.jgit.lib.ObjectReader} that supports reusing 25 * objects in packs. 26 * <p> 27 * {@code ObjectReader} implementations may also optionally implement this 28 * interface to support 29 * {@link org.eclipse.jgit.internal.storage.pack.PackWriter} with a means of 30 * copying an object that is already in pack encoding format directly into the 31 * output stream, without incurring decompression and recompression overheads. 32 */ 33 public interface ObjectReuseAsIs { 34 /** 35 * Allocate a new {@code PackWriter} state structure for an object. 36 * <p> 37 * {@link org.eclipse.jgit.internal.storage.pack.PackWriter} allocates these 38 * objects to keep track of the per-object state, and how to load the 39 * objects efficiently into the generated stream. Implementers may subclass 40 * this type with additional object state, such as to remember what file and 41 * offset contains the object's pack encoded data. 42 * 43 * @param objectId 44 * the id of the object that will be packed. 45 * @param type 46 * the Git type of the object that will be packed. 47 * @return a new instance for this object. 48 */ 49 ObjectToPack newObjectToPack(AnyObjectId objectId, int type); 50 51 /** 52 * Select the best object representation for a packer. 53 * <p> 54 * Implementations should iterate through all available representations of 55 * an object, and pass them in turn to the PackWriter though 56 * {@link org.eclipse.jgit.internal.storage.pack.PackWriter#select(ObjectToPack, StoredObjectRepresentation)} 57 * so the writer can select the most suitable representation to reuse into 58 * the output stream. 59 * <p> 60 * If the implementation returns CachedPack from 61 * {@link #getCachedPacksAndUpdate(BitmapBuilder)} it must consider the 62 * representation of any object that is stored in any of the offered 63 * CachedPacks. PackWriter relies on this behavior to prune duplicate 64 * objects out of the pack stream when it selects a CachedPack and the 65 * object was also reached through the thin-pack enumeration. 66 * <p> 67 * The implementation may choose to consider multiple objects at once on 68 * concurrent threads, but must evaluate all representations of an object 69 * within the same thread. 70 * 71 * @param packer 72 * the packer that will write the object in the near future. 73 * @param monitor 74 * progress monitor, implementation should update the monitor 75 * once for each item in the iteration when selection is done. 76 * @param objects 77 * the objects that are being packed. 78 * @throws org.eclipse.jgit.errors.MissingObjectException 79 * there is no representation available for the object, as it is 80 * no longer in the repository. Packing will abort. 81 * @throws java.io.IOException 82 * the repository cannot be accessed. Packing will abort. 83 */ 84 void selectObjectRepresentation(PackWriter packer, 85 ProgressMonitor monitor, Iterable<ObjectToPack> objects) 86 throws IOException, MissingObjectException; 87 88 /** 89 * Write objects to the pack stream in roughly the order given. 90 * 91 * {@code PackWriter} invokes this method to write out one or more objects, 92 * in approximately the order specified by the iteration over the list. A 93 * simple implementation of this method would just iterate the list and 94 * output each object: 95 * 96 * <pre> 97 * for (ObjectToPack obj : list) 98 * out.writeObject(obj) 99 * </pre> 100 * 101 * However more sophisticated implementors may try to perform some (small) 102 * reordering to access objects that are stored close to each other at 103 * roughly the same time. Implementations may choose to write objects out of 104 * order, but this may increase pack file size due to using a larger header 105 * format to reach a delta base that is later in the stream. It may also 106 * reduce data locality for the reader, slowing down data access. 107 * 108 * Invoking 109 * {@link org.eclipse.jgit.internal.storage.pack.PackOutputStream#writeObject(ObjectToPack)} 110 * will cause 111 * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be 112 * invoked recursively on {@code this} if the current object is scheduled 113 * for reuse. 114 * 115 * @param out 116 * the stream to write each object to. 117 * @param list 118 * the list of objects to write. Objects should be written in 119 * approximately this order. Implementors may resort the list 120 * elements in-place during writing if desired. 121 * @throws java.io.IOException 122 * the stream cannot be written to, or one or more required 123 * objects cannot be accessed from the object database. 124 */ 125 void writeObjects(PackOutputStream out, List<ObjectToPack> list) 126 throws IOException; 127 128 /** 129 * Output a previously selected representation. 130 * <p> 131 * {@code PackWriter} invokes this method only if a representation 132 * previously given to it by {@code selectObjectRepresentation} was chosen 133 * for reuse into the output stream. The {@code otp} argument is an instance 134 * created by this reader's own {@code newObjectToPack}, and the 135 * representation data saved within it also originated from this reader. 136 * <p> 137 * Implementors must write the object header before copying the raw data to 138 * the output stream. The typical implementation is like: 139 * 140 * <pre> 141 * MyToPack mtp = (MyToPack) otp; 142 * byte[] raw; 143 * if (validate) 144 * raw = validate(mtp); // throw SORNAE here, if at all 145 * else 146 * raw = readFast(mtp); 147 * out.writeHeader(mtp, mtp.inflatedSize); 148 * out.write(raw); 149 * </pre> 150 * 151 * @param out 152 * stream the object should be written to. 153 * @param otp 154 * the object's saved representation information. 155 * @param validate 156 * if true the representation must be validated and not be 157 * corrupt before being reused. If false, validation may be 158 * skipped as it will be performed elsewhere in the processing 159 * pipeline. 160 * @throws org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException 161 * the previously selected representation is no longer 162 * available. If thrown before {@code out.writeHeader} the pack 163 * writer will try to find another representation, and write 164 * that one instead. If throw after {@code out.writeHeader}, 165 * packing will abort. 166 * @throws java.io.IOException 167 * the stream's write method threw an exception. Packing will 168 * abort. 169 */ 170 void copyObjectAsIs(PackOutputStream out, ObjectToPack otp, 171 boolean validate) throws IOException, 172 StoredObjectRepresentationNotAvailableException; 173 174 /** 175 * Append an entire pack's contents onto the output stream. 176 * <p> 177 * The entire pack, excluding its header and trailing footer is sent. 178 * 179 * @param out 180 * stream to append the pack onto. 181 * @param pack 182 * the cached pack to send. 183 * @throws java.io.IOException 184 * the pack cannot be read, or stream did not accept a write. 185 */ 186 void copyPackAsIs(PackOutputStream out, CachedPack pack) 187 throws IOException; 188 189 /** 190 * Obtain the available cached packs that match the bitmap and update 191 * the bitmap by removing the items that are in the CachedPack. 192 * <p> 193 * A cached pack has known starting points and may be sent entirely as-is, 194 * with almost no effort on the sender's part. 195 * 196 * @param needBitmap 197 * the bitmap that contains all of the objects the client wants. 198 * @return the available cached packs. 199 * @throws java.io.IOException 200 * the cached packs cannot be listed from the repository. 201 * Callers may choose to ignore this and continue as-if there 202 * were no cached packs. 203 */ 204 Collection<CachedPack> getCachedPacksAndUpdate( 205 BitmapBuilder needBitmap) throws IOException; 206 }