View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.internal.storage.dfs;
12  
13  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
14  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
15  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
16  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
17  import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
18  import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.HashSet;
26  import java.util.Iterator;
27  import java.util.List;
28  import java.util.Set;
29  
30  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
31  import org.eclipse.jgit.internal.JGitText;
32  import org.eclipse.jgit.internal.storage.file.PackIndex;
33  import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
34  import org.eclipse.jgit.internal.storage.pack.PackWriter;
35  import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
36  import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
37  import org.eclipse.jgit.lib.AnyObjectId;
38  import org.eclipse.jgit.lib.NullProgressMonitor;
39  import org.eclipse.jgit.lib.ObjectId;
40  import org.eclipse.jgit.lib.ObjectIdSet;
41  import org.eclipse.jgit.lib.ProgressMonitor;
42  import org.eclipse.jgit.revwalk.RevFlag;
43  import org.eclipse.jgit.revwalk.RevObject;
44  import org.eclipse.jgit.revwalk.RevWalk;
45  import org.eclipse.jgit.storage.pack.PackConfig;
46  import org.eclipse.jgit.storage.pack.PackStatistics;
47  import org.eclipse.jgit.util.BlockList;
48  import org.eclipse.jgit.util.io.CountingOutputStream;
49  
50  /**
51   * Combine several pack files into one pack.
52   * <p>
53   * The compactor combines several pack files together by including all objects
54   * contained in each pack file into the same output pack. If an object appears
55   * multiple times, it is only included once in the result. Because the new pack
56   * is constructed by enumerating the indexes of the source packs, it is quicker
57   * than doing a full repack of the repository, however the result is not nearly
58   * as space efficient as new delta compression is disabled.
59   * <p>
60   * This method is suitable for quickly combining several packs together after
61   * receiving a number of small fetch or push operations into a repository,
62   * allowing the system to maintain reasonable read performance without expending
63   * a lot of time repacking the entire repository.
64   */
65  public class DfsPackCompactor {
66  	private final DfsRepository repo;
67  	private final List<DfsPackFile> srcPacks;
68  	private final List<DfsReftable> srcReftables;
69  	private final List<ObjectIdSet> exclude;
70  
71  	private PackStatistics newStats;
72  	private DfsPackDescription outDesc;
73  
74  	private int autoAddSize;
75  	private ReftableConfig reftableConfig;
76  
77  	private RevWalk rw;
78  	private RevFlag added;
79  	private RevFlag isBase;
80  
81  	/**
82  	 * Initialize a pack compactor.
83  	 *
84  	 * @param repository
85  	 *            repository objects to be packed will be read from.
86  	 */
87  	public DfsPackCompactor(DfsRepository repository) {
88  		repo = repository;
89  		autoAddSize = 5 * 1024 * 1024; // 5 MiB
90  		srcPacks = new ArrayList<>();
91  		srcReftables = new ArrayList<>();
92  		exclude = new ArrayList<>(4);
93  	}
94  
95  	/**
96  	 * Set configuration to write a reftable.
97  	 *
98  	 * @param cfg
99  	 *            configuration to write a reftable. Reftable compacting is
100 	 *            disabled (default) when {@code cfg} is {@code null}.
101 	 * @return {@code this}
102 	 */
103 	public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
104 		reftableConfig = cfg;
105 		return this;
106 	}
107 
108 	/**
109 	 * Add a pack to be compacted.
110 	 * <p>
111 	 * All of the objects in this pack will be copied into the resulting pack.
112 	 * The resulting pack will order objects according to the source pack's own
113 	 * description ordering (which is based on creation date), and then by the
114 	 * order the objects appear in the source pack.
115 	 *
116 	 * @param pack
117 	 *            a pack to combine into the resulting pack.
118 	 * @return {@code this}
119 	 */
120 	public DfsPackCompactor add(DfsPackFile pack) {
121 		srcPacks.add(pack);
122 		return this;
123 	}
124 
125 	/**
126 	 * Add a reftable to be compacted.
127 	 *
128 	 * @param table
129 	 *            a reftable to combine.
130 	 * @return {@code this}
131 	 */
132 	public DfsPackCompactor add(DfsReftable table) {
133 		srcReftables.add(table);
134 		return this;
135 	}
136 
137 	/**
138 	 * Automatically select pack and reftables to be included, and add them.
139 	 * <p>
140 	 * Packs are selected based on size, smaller packs get included while bigger
141 	 * ones are omitted.
142 	 *
143 	 * @return {@code this}
144 	 * @throws java.io.IOException
145 	 *             existing packs cannot be read.
146 	 */
147 	public DfsPackCompactor autoAdd() throws IOException {
148 		DfsObjDatabase objdb = repo.getObjectDatabase();
149 		for (DfsPackFile pack : objdb.getPacks()) {
150 			DfsPackDescription d = pack.getPackDescription();
151 			if (d.getFileSize(PACK) < autoAddSize)
152 				add(pack);
153 			else
154 				exclude(pack);
155 		}
156 
157 		if (reftableConfig != null) {
158 			for (DfsReftable table : objdb.getReftables()) {
159 				DfsPackDescription d = table.getPackDescription();
160 				if (d.getPackSource() != GC
161 						&& d.getFileSize(REFTABLE) < autoAddSize) {
162 					add(table);
163 				}
164 			}
165 		}
166 		return this;
167 	}
168 
169 	/**
170 	 * Exclude objects from the compacted pack.
171 	 *
172 	 * @param set
173 	 *            objects to not include.
174 	 * @return {@code this}.
175 	 */
176 	public DfsPackCompactor exclude(ObjectIdSet set) {
177 		exclude.add(set);
178 		return this;
179 	}
180 
181 	/**
182 	 * Exclude objects from the compacted pack.
183 	 *
184 	 * @param pack
185 	 *            objects to not include.
186 	 * @return {@code this}.
187 	 * @throws java.io.IOException
188 	 *             pack index cannot be loaded.
189 	 */
190 	public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
191 		final PackIndex idx;
192 		try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
193 			idx = pack.getPackIndex(ctx);
194 		}
195 		return exclude(idx);
196 	}
197 
198 	/**
199 	 * Compact the pack files together.
200 	 *
201 	 * @param pm
202 	 *            progress monitor to receive updates on as packing may take a
203 	 *            while, depending on the size of the repository.
204 	 * @throws java.io.IOException
205 	 *             the packs cannot be compacted.
206 	 */
207 	public void compact(ProgressMonitor pm) throws IOException {
208 		if (pm == null) {
209 			pm = NullProgressMonitor.INSTANCE;
210 		}
211 
212 		DfsObjDatabase objdb = repo.getObjectDatabase();
213 		try (DfsReader ctx = objdb.newReader()) {
214 			if (reftableConfig != null && !srcReftables.isEmpty()) {
215 				compactReftables(ctx);
216 			}
217 			compactPacks(ctx, pm);
218 
219 			List<DfsPackDescription> commit = getNewPacks();
220 			Collection<DfsPackDescription> remove = toPrune();
221 			if (!commit.isEmpty() || !remove.isEmpty()) {
222 				objdb.commitPack(commit, remove);
223 			}
224 		} finally {
225 			rw = null;
226 		}
227 	}
228 
229 	private void compactPacks(DfsReader ctx, ProgressMonitor pm)
230 			throws IOException, IncorrectObjectTypeException {
231 		DfsObjDatabase objdb = repo.getObjectDatabase();
232 		PackConfig pc = new PackConfig(repo);
233 		pc.setIndexVersion(2);
234 		pc.setDeltaCompress(false);
235 		pc.setReuseDeltas(true);
236 		pc.setReuseObjects(true);
237 
238 		try (PackWriter pw = new PackWriter(pc, ctx)) {
239 			pw.setDeltaBaseAsOffset(true);
240 			pw.setReuseDeltaCommits(false);
241 
242 			addObjectsToPack(pw, ctx, pm);
243 			if (pw.getObjectCount() == 0) {
244 				return;
245 			}
246 
247 			boolean rollback = true;
248 			initOutDesc(objdb);
249 			try {
250 				writePack(objdb, outDesc, pw, pm);
251 				writeIndex(objdb, outDesc, pw);
252 
253 				PackStatistics stats = pw.getStatistics();
254 
255 				outDesc.setPackStats(stats);
256 				newStats = stats;
257 				rollback = false;
258 			} finally {
259 				if (rollback) {
260 					objdb.rollbackPack(Collections.singletonList(outDesc));
261 				}
262 			}
263 		}
264 	}
265 
266 	private long estimatePackSize() {
267 		// Every pack file contains 12 bytes of header and 20 bytes of trailer.
268 		// Include the final pack file header and trailer size here and ignore
269 		// the same from individual pack files.
270 		long size = 32;
271 		for (DfsPackFile pack : srcPacks) {
272 			size += pack.getPackDescription().getFileSize(PACK) - 32;
273 		}
274 		return size;
275 	}
276 
277 	private void compactReftables(DfsReader ctx) throws IOException {
278 		DfsObjDatabase objdb = repo.getObjectDatabase();
279 		Collections.sort(srcReftables, objdb.reftableComparator());
280 
281 		initOutDesc(objdb);
282 		try (DfsReftableStack stack = DfsReftableStack.open(ctx, srcReftables);
283 		     DfsOutputStream out = objdb.writeFile(outDesc, REFTABLE)) {
284 			ReftableCompactor compact = new ReftableCompactor(out);
285 			compact.addAll(stack.readers());
286 			compact.setIncludeDeletes(true);
287 			compact.setConfig(configureReftable(reftableConfig, out));
288 			compact.compact();
289 			outDesc.addFileExt(REFTABLE);
290 			outDesc.setReftableStats(compact.getStats());
291 		}
292 	}
293 
294 	private void initOutDesc(DfsObjDatabase objdb) throws IOException {
295 		if (outDesc == null) {
296 			outDesc = objdb.newPack(COMPACT, estimatePackSize());
297 		}
298 	}
299 
300 	/**
301 	 * Get all of the source packs that fed into this compaction.
302 	 *
303 	 * @return all of the source packs that fed into this compaction.
304 	 */
305 	public Collection<DfsPackDescription> getSourcePacks() {
306 		Set<DfsPackDescription> src = new HashSet<>();
307 		for (DfsPackFile pack : srcPacks) {
308 			src.add(pack.getPackDescription());
309 		}
310 		for (DfsReftable table : srcReftables) {
311 			src.add(table.getPackDescription());
312 		}
313 		return src;
314 	}
315 
316 	/**
317 	 * Get new packs created by this compaction.
318 	 *
319 	 * @return new packs created by this compaction.
320 	 */
321 	public List<DfsPackDescription> getNewPacks() {
322 		return outDesc != null
323 				? Collections.singletonList(outDesc)
324 				: Collections.emptyList();
325 	}
326 
327 	/**
328 	 * Get statistics corresponding to the {@link #getNewPacks()}.
329 	 * May be null if statistics are not available.
330 	 *
331 	 * @return statistics corresponding to the {@link #getNewPacks()}.
332 	 *
333 	 */
334 	public List<PackStatistics> getNewPackStatistics() {
335 		return outDesc != null
336 				? Collections.singletonList(newStats)
337 				: Collections.emptyList();
338 	}
339 
340 	private Collection<DfsPackDescription> toPrune() {
341 		Set<DfsPackDescription> packs = new HashSet<>();
342 		for (DfsPackFile pack : srcPacks) {
343 			packs.add(pack.getPackDescription());
344 		}
345 
346 		Set<DfsPackDescription> reftables = new HashSet<>();
347 		for (DfsReftable table : srcReftables) {
348 			reftables.add(table.getPackDescription());
349 		}
350 
351 		for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
352 			DfsPackDescription d = i.next();
353 			if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
354 				i.remove();
355 			}
356 		}
357 
358 		for (Iterator<DfsPackDescription> i = reftables.iterator();
359 				i.hasNext();) {
360 			DfsPackDescription d = i.next();
361 			if (d.hasFileExt(PACK) && !packs.contains(d)) {
362 				i.remove();
363 			}
364 		}
365 
366 		Set<DfsPackDescription> toPrune = new HashSet<>();
367 		toPrune.addAll(packs);
368 		toPrune.addAll(reftables);
369 		return toPrune;
370 	}
371 
372 	private void addObjectsToPack(PackWriter pw, DfsReader ctx,
373 			ProgressMonitor pm) throws IOException,
374 			IncorrectObjectTypeException {
375 		// Sort packs by description ordering, this places newer packs before
376 		// older packs, allowing the PackWriter to be handed newer objects
377 		// first and older objects last.
378 		Collections.sort(
379 				srcPacks,
380 				Comparator.comparing(
381 						DfsPackFile::getPackDescription,
382 						DfsPackDescription.objectLookupComparator()));
383 
384 		rw = new RevWalk(ctx);
385 		added = rw.newFlag("ADDED"); //$NON-NLS-1$
386 		isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
387 		List<RevObject> baseObjects = new BlockList<>();
388 
389 		pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
390 		for (DfsPackFile src : srcPacks) {
391 			List<ObjectIdWithOffset> want = toInclude(src, ctx);
392 			if (want.isEmpty())
393 				continue;
394 
395 			PackReverseIndex rev = src.getReverseIdx(ctx);
396 			DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
397 			for (ObjectIdWithOffset id : want) {
398 				int type = src.getObjectType(ctx, id.offset);
399 				RevObject obj = rw.lookupAny(id, type);
400 				if (obj.has(added))
401 					continue;
402 
403 				pm.update(1);
404 				pw.addObject(obj);
405 				obj.add(added);
406 
407 				src.representation(rep, id.offset, ctx, rev);
408 				if (rep.getFormat() != PACK_DELTA)
409 					continue;
410 
411 				RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
412 				if (!base.has(added) && !base.has(isBase)) {
413 					baseObjects.add(base);
414 					base.add(isBase);
415 				}
416 			}
417 		}
418 		for (RevObject obj : baseObjects) {
419 			if (!obj.has(added)) {
420 				pm.update(1);
421 				pw.addObject(obj);
422 				obj.add(added);
423 			}
424 		}
425 		pm.endTask();
426 	}
427 
428 	private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
429 			throws IOException {
430 		PackIndex srcIdx = src.getPackIndex(ctx);
431 		List<ObjectIdWithOffset> want = new BlockList<>(
432 				(int) srcIdx.getObjectCount());
433 		SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
434 			ObjectId id = ent.toObjectId();
435 			RevObject obj = rw.lookupOrNull(id);
436 			if (obj != null && (obj.has(added) || obj.has(isBase)))
437 				continue;
438 			for (ObjectIdSet e : exclude)
439 				if (e.contains(id))
440 					continue SCAN;
441 			want.add(new ObjectIdWithOffset(id, ent.getOffset()));
442 		}
443 		Collections.sort(want, (ObjectIdWithOffset a,
444 				ObjectIdWithOffset b) -> Long.signum(a.offset - b.offset));
445 		return want;
446 	}
447 
448 	private static void writePack(DfsObjDatabase objdb,
449 			DfsPackDescription pack,
450 			PackWriter pw, ProgressMonitor pm) throws IOException {
451 		try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
452 			pw.writePack(pm, pm, out);
453 			pack.addFileExt(PACK);
454 			pack.setBlockSize(PACK, out.blockSize());
455 		}
456 	}
457 
458 	private static void writeIndex(DfsObjDatabase objdb,
459 			DfsPackDescription pack,
460 			PackWriter pw) throws IOException {
461 		try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
462 			CountingOutputStream cnt = new CountingOutputStream(out);
463 			pw.writeIndex(cnt);
464 			pack.addFileExt(INDEX);
465 			pack.setFileSize(INDEX, cnt.getCount());
466 			pack.setBlockSize(INDEX, out.blockSize());
467 			pack.setIndexVersion(pw.getIndexVersion());
468 		}
469 	}
470 
471 	static ReftableConfig configureReftable(ReftableConfig cfg,
472 			DfsOutputStream out) {
473 		int bs = out.blockSize();
474 		if (bs > 0) {
475 			cfg = new ReftableConfig(cfg);
476 			cfg.setRefBlockSize(bs);
477 			cfg.setAlignBlocks(true);
478 		}
479 		return cfg;
480 	}
481 
482 	private static class ObjectIdWithOffset extends ObjectId {
483 		final long offset;
484 
485 		ObjectIdWithOffset(AnyObjectId id, long ofs) {
486 			super(id);
487 			offset = ofs;
488 		}
489 	}
490 }