View Javadoc
1   /*
2    * Copyright (C) 2012, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.internal.storage.pack;
12  
13  import static org.eclipse.jgit.internal.storage.file.PackBitmapIndex.FLAG_REUSE;
14  import static org.eclipse.jgit.revwalk.RevFlag.SEEN;
15  
16  import java.io.IOException;
17  import java.util.ArrayList;
18  import java.util.Collection;
19  import java.util.Collections;
20  import java.util.Comparator;
21  import java.util.HashSet;
22  import java.util.Iterator;
23  import java.util.List;
24  import java.util.Set;
25  
26  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
27  import org.eclipse.jgit.errors.MissingObjectException;
28  import org.eclipse.jgit.internal.JGitText;
29  import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
30  import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
31  import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap;
32  import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
33  import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder;
34  import org.eclipse.jgit.internal.storage.file.PackBitmapIndexRemapper;
35  import org.eclipse.jgit.lib.AnyObjectId;
36  import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
37  import org.eclipse.jgit.lib.Constants;
38  import org.eclipse.jgit.lib.ObjectId;
39  import org.eclipse.jgit.lib.ObjectReader;
40  import org.eclipse.jgit.lib.ProgressMonitor;
41  import org.eclipse.jgit.revwalk.BitmapWalker;
42  import org.eclipse.jgit.revwalk.ObjectWalk;
43  import org.eclipse.jgit.revwalk.RevCommit;
44  import org.eclipse.jgit.revwalk.RevObject;
45  import org.eclipse.jgit.revwalk.RevWalk;
46  import org.eclipse.jgit.revwalk.filter.RevFilter;
47  import org.eclipse.jgit.storage.pack.PackConfig;
48  import org.eclipse.jgit.util.BlockList;
49  import org.eclipse.jgit.util.SystemReader;
50  
51  import com.googlecode.javaewah.EWAHCompressedBitmap;
52  
53  /**
54   * Helper class for the {@link PackWriter} to select commits for which to build
55   * pack index bitmaps.
56   */
57  class PackWriterBitmapPreparer {
58  
59  	private static final int DAY_IN_SECONDS = 24 * 60 * 60;
60  
61  	private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
62  			RevCommit a, RevCommit b) -> Integer
63  					.signum(b.getCommitTime() - a.getCommitTime());
64  
65  	private final ObjectReader reader;
66  	private final ProgressMonitor pm;
67  	private final Set<? extends ObjectId> want;
68  	private final PackBitmapIndexBuilder writeBitmaps;
69  	private final BitmapIndexImpl commitBitmapIndex;
70  	private final PackBitmapIndexRemapper bitmapRemapper;
71  	private final BitmapIndexImpl bitmapIndex;
72  
73  	private final int contiguousCommitCount;
74  	private final int recentCommitCount;
75  	private final int recentCommitSpan;
76  	private final int distantCommitSpan;
77  	private final int excessiveBranchCount;
78  	private final long inactiveBranchTimestamp;
79  
80  	PackWriterBitmapPreparer(ObjectReader reader,
81  			PackBitmapIndexBuilder writeBitmaps, ProgressMonitor pm,
82  			Set<? extends ObjectId> want, PackConfig config)
83  					throws IOException {
84  		this.reader = reader;
85  		this.writeBitmaps = writeBitmaps;
86  		this.pm = pm;
87  		this.want = want;
88  		this.commitBitmapIndex = new BitmapIndexImpl(writeBitmaps);
89  		this.bitmapRemapper = PackBitmapIndexRemapper.newPackBitmapIndex(
90  				reader.getBitmapIndex(), writeBitmaps);
91  		this.bitmapIndex = new BitmapIndexImpl(bitmapRemapper);
92  		this.contiguousCommitCount = config.getBitmapContiguousCommitCount();
93  		this.recentCommitCount = config.getBitmapRecentCommitCount();
94  		this.recentCommitSpan = config.getBitmapRecentCommitSpan();
95  		this.distantCommitSpan = config.getBitmapDistantCommitSpan();
96  		this.excessiveBranchCount = config.getBitmapExcessiveBranchCount();
97  		long now = SystemReader.getInstance().getCurrentTime();
98  		long ageInSeconds = config.getBitmapInactiveBranchAgeInDays()
99  				* DAY_IN_SECONDS;
100 		this.inactiveBranchTimestamp = (now / 1000) - ageInSeconds;
101 	}
102 
103 	/**
104 	 * Returns the commit objects for which bitmap indices should be built.
105 	 *
106 	 * @param expectedCommitCount
107 	 *            count of commits in the pack
108 	 * @param excludeFromBitmapSelection
109 	 *            commits that should be excluded from bitmap selection
110 	 * @return commit objects for which bitmap indices should be built
111 	 * @throws IncorrectObjectTypeException
112 	 *             if any of the processed objects is not a commit
113 	 * @throws IOException
114 	 *             on errors reading pack or index files
115 	 * @throws MissingObjectException
116 	 *             if an expected object is missing
117 	 */
118 	Collection<BitmapCommit> selectCommits(int expectedCommitCount,
119 			Set<? extends ObjectId> excludeFromBitmapSelection)
120 			throws IncorrectObjectTypeException, IOException,
121 			MissingObjectException {
122 		/*
123 		 * Thinking of bitmap indices as a cache, if we find bitmaps at or at a
124 		 * close ancestor to 'old' and 'new' when calculating old..new, then all
125 		 * objects can be calculated with minimal graph walking. A distribution
126 		 * that favors creating bitmaps for the most recent commits maximizes
127 		 * the cache hits for clients that are close to HEAD, which is the
128 		 * majority of calculations performed.
129 		 */
130 		try (RevWalkvwalk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(reader);
131 				RevWalk rw2 = new RevWalk(reader)) {
132 			pm.beginTask(JGitText.get().selectingCommits,
133 					ProgressMonitor.UNKNOWN);
134 			rw.setRetainBody(false);
135 			CommitSelectionHelper selectionHelper = captureOldAndNewCommits(rw,
136 					expectedCommitCount, excludeFromBitmapSelection);
137 			pm.endTask();
138 
139 			// Add reused bitmaps from the previous GC pack's bitmap indices.
140 			// Currently they are always fully reused, even if their spans don't
141 			// match this run's PackConfig values.
142 			int newCommits = selectionHelper.getCommitCount();
143 			BlockList<BitmapCommit> selections = new BlockList<>(
144 					selectionHelper.reusedCommits.size()
145 							+ newCommits / recentCommitSpan + 1);
146 			for (BitmapCommit reuse : selectionHelper.reusedCommits) {
147 				selections.add(reuse);
148 			}
149 
150 			if (newCommits == 0) {
151 				for (AnyObjectId id : selectionHelper.newWants) {
152 					selections.add(new BitmapCommit(id, false, 0));
153 				}
154 				return selections;
155 			}
156 
157 			pm.beginTask(JGitText.get().selectingCommits, newCommits);
158 			int totalWants = want.size();
159 			BitmapBuilder seen = commitBitmapIndex.newBitmapBuilder();
160 			seen.or(selectionHelper.reusedCommitsBitmap);
161 			rw2.setRetainBody(false);
162 			rw2.setRevFilter(new NotInBitmapFilter(seen));
163 
164 			// For each branch, do a revwalk to enumerate its commits. Exclude
165 			// both reused commits and any commits seen in a previous branch.
166 			// Then iterate through all new commits from oldest to newest,
167 			// selecting well-spaced commits in this branch.
168 			for (RevCommit rc : selectionHelper.newWantsByNewest) {
169 				BitmapBuilder tipBitmap = commitBitmapIndex.newBitmapBuilder();
170 				rw2.markStart((RevCommit) rw2.peel(rw2.parseAny(rc)));
171 				RevCommit rc2;
172 				while ((rc2 = rw2.next()) != null) {
173 					tipBitmap.addObject(rc2, Constants.OBJ_COMMIT);
174 				}
175 				int cardinality = tipBitmap.cardinality();
176 				seen.or(tipBitmap);
177 
178 				// Within this branch, keep ordered lists of commits
179 				// representing chains in its history, where each chain is a
180 				// "sub-branch". Ordering commits by these chains makes for
181 				// fewer differences between consecutive selected commits, which
182 				// in turn provides better compression/on the run-length
183 				// encoding of the XORs between them.
184 				List<List<BitmapCommit>> chains = new ArrayList<>();
185 
186 				// Mark the current branch as inactive if its tip commit isn't
187 				// recent and there are an excessive number of branches, to
188 				// prevent memory bloat of computing too many bitmaps for stale
189 				// branches.
190 				boolean isActiveBranch = true;
191 				if (totalWants > excessiveBranchCount && !isRecentCommit(rc)) {
192 					isActiveBranch = false;
193 				}
194 
195 				// Insert bitmaps at the offsets suggested by the
196 				// nextSelectionDistance() heuristic. Only reuse bitmaps created
197 				// for more distant commits.
198 				int index = -1;
199 				int nextIn = nextSpan(cardinality);
200 				int nextFlg = nextIn == distantCommitSpan
201 						? PackBitmapIndex.FLAG_REUSE
202 						: 0;
203 
204 				// For the current branch, iterate through all commits from
205 				// oldest to newest.
206 				for (RevCommit c : selectionHelper) {
207 					// Optimization: if we have found all the commits for this
208 					// branch, stop searching
209 					int distanceFromTip = cardinality - index - 1;
210 					if (distanceFromTip == 0) {
211 						break;
212 					}
213 
214 					// Ignore commits that are not in this branch
215 					if (!tipBitmap.contains(c)) {
216 						continue;
217 					}
218 
219 					index++;
220 					nextIn--;
221 					pm.update(1);
222 
223 					// Always pick the items in wants, prefer merge commits.
224 					if (selectionHelper.newWants.remove(c)) {
225 						if (nextIn > 0) {
226 							nextFlg = 0;
227 						}
228 					} else {
229 						boolean stillInSpan = nextIn >= 0;
230 						boolean isMergeCommit = c.getParentCount() > 1;
231 						// Force selection if:
232 						// a) we have exhausted the window looking for merges
233 						// b) we are in the top commits of an active branch
234 						// c) we are at a branch tip
235 						boolean mustPick = (nextIn <= -recentCommitSpan)
236 								|| (isActiveBranch
237 										&& (distanceFromTip <= contiguousCommitCount))
238 								|| (distanceFromTip == 1); // most recent commit
239 						if (!mustPick && (stillInSpan || !isMergeCommit)) {
240 							continue;
241 						}
242 					}
243 
244 					// This commit is selected.
245 					// Calculate where to look for the next one.
246 					int flags = nextFlg;
247 					nextIn = nextSpan(distanceFromTip);
248 					nextFlg = nextIn == distantCommitSpan
249 							? PackBitmapIndex.FLAG_REUSE
250 							: 0;
251 
252 					// Create the commit bitmap for the current commit
253 					BitmapBuilder bitmap = commitBitmapIndex.newBitmapBuilder();
254 					rw.reset();
255 					rw.markStart(c);
256 					rw.setRevFilter(new AddUnseenToBitmapFilter(
257 							selectionHelper.reusedCommitsBitmap, bitmap));
258 					while (rw.next() != null) {
259 						// The filter adds the reachable commits to bitmap.
260 					}
261 
262 					// Sort the commits by independent chains in this branch's
263 					// history, yielding better compression when building
264 					// bitmaps.
265 					List<BitmapCommit> longestAncestorChain = null;
266 					for (List<BitmapCommit> chain : chains) {
267 						BitmapCommit mostRecentCommit = chain
268 								.get(chain.size() - 1);
269 						if (bitmap.contains(mostRecentCommit)) {
270 							if (longestAncestorChain == null
271 									|| longestAncestorChain.size() < chain
272 											.size()) {
273 								longestAncestorChain = chain;
274 							}
275 						}
276 					}
277 
278 					if (longestAncestorChain == null) {
279 						longestAncestorChain = new ArrayList<>();
280 						chains.add(longestAncestorChain);
281 					}
282 					longestAncestorChain.add(new BitmapCommit(c,
283 							!longestAncestorChain.isEmpty(), flags));
284 					writeBitmaps.addBitmap(c, bitmap, 0);
285 				}
286 
287 				for (List<BitmapCommit> chain : chains) {
288 					selections.addAll(chain);
289 				}
290 			}
291 			writeBitmaps.clearBitmaps(); // Remove the temporary commit bitmaps.
292 
293 			// Add the remaining peeledWant
294 			for (AnyObjectId remainingWant : selectionHelper.newWants) {
295 				selections.add(new BitmapCommit(remainingWant, false, 0));
296 			}
297 
298 			pm.endTask();
299 			return selections;
300 		}
301 	}
302 
303 	private boolean isRecentCommit(RevCommit revCommit) {
304 		return revCommit.getCommitTime() > inactiveBranchTimestamp;
305 	}
306 
307 	/**
308 	 * A RevFilter that excludes the commits named in a bitmap from the walk.
309 	 * <p>
310 	 * If a commit is in {@code bitmap} then that commit is not emitted by the
311 	 * walk and its parents are marked as SEEN so the walk can skip them.  The
312 	 * bitmaps passed in have the property that the parents of any commit in
313 	 * {@code bitmap} are also in {@code bitmap}, so marking the parents as
314 	 * SEEN speeds up the RevWalk by saving it from walking down blind alleys
315 	 * and does not change the commits emitted.
316 	 */
317 	private static class NotInBitmapFilter extends RevFilter {
318 		private final BitmapBuilder bitmap;
319 
320 		NotInBitmapFilter(BitmapBuilder bitmap) {
321 			this.bitmap = bitmap;
322 		}
323 
324 		@Override
325 		public final boolean include(RevWalk rw, RevCommit c) {
326 			if (!bitmap.contains(c)) {
327 				return true;
328 			}
329 			for (RevCommit p : c.getParents()) {
330 				p.add(SEEN);
331 			}
332 			return false;
333 		}
334 
335 		@Override
336 		public final NotInBitmapFilter clone() {
337 			throw new UnsupportedOperationException();
338 		}
339 
340 		@Override
341 		public final boolean requiresCommitBody() {
342 			return false;
343 		}
344 	}
345 
346 	/**
347 	 * Records which of the {@code wants} can be found in the previous GC pack's
348 	 * bitmap indices and which are new.
349 	 *
350 	 * @param rw
351 	 *            a {@link RevWalk} to find reachable objects in this repository
352 	 * @param expectedCommitCount
353 	 *            expected count of commits. The actual count may be less due to
354 	 *            unreachable garbage.
355 	 * @param excludeFromBitmapSelection
356 	 *            commits that should be excluded from bitmap selection
357 	 * @return a {@link CommitSelectionHelper} capturing which commits are
358 	 *         covered by a previous pack's bitmaps and which new commits need
359 	 *         bitmap coverage
360 	 * @throws IncorrectObjectTypeException
361 	 *             if any of the processed objects is not a commit
362 	 * @throws IOException
363 	 *             on errors reading pack or index files
364 	 * @throws MissingObjectException
365 	 *             if an expected object is missing
366 	 */
367 	private CommitSelectionHelper captureOldAndNewCommits(RevWalk rw,
368 			int expectedCommitCount,
369 			Set<? extends ObjectId> excludeFromBitmapSelection)
370 			throws IncorrectObjectTypeException, IOException,
371 			MissingObjectException {
372 		// Track bitmaps and commits from the previous GC pack bitmap indices.
373 		BitmapBuilder reuse = commitBitmapIndex.newBitmapBuilder();
374 		List<BitmapCommit> reuseCommits = new ArrayList<>();
375 		for (PackBitmapIndexRemapper.Entry entry : bitmapRemapper) {
376 			// More recent commits did not have the reuse flag set, so skip them
377 			if ((entry.getFlags() & FLAG_REUSE) != FLAG_REUSE) {
378 				continue;
379 			}
380 			RevObject ro = rw.peel(rw.parseAny(entry));
381 			if (!(ro instanceof RevCommit)) {
382 				continue;
383 			}
384 
385 			RevCommit rc = (RevCommit) ro;
386 			reuseCommits.add(new BitmapCommit(rc, false, entry.getFlags()));
387 			if (!reuse.contains(rc)) {
388 				EWAHCompressedBitmap bitmap = bitmapRemapper.ofObjectType(
389 						bitmapRemapper.getBitmap(rc), Constants.OBJ_COMMIT);
390 				reuse.or(new CompressedBitmap(bitmap, commitBitmapIndex));
391 			}
392 		}
393 
394 		// Add branch tips that are not represented in a previous pack's bitmap
395 		// indices. Set up a RevWalk to find new commits not in the old packs.
396 		List<RevCommit> newWantsByNewest = new ArrayList<>(want.size());
397 		Set<RevCommit> newWants = new HashSet<>(want.size());
398 		for (AnyObjectId objectId : want) {
399 			RevObject ro = rw.peel(rw.parseAny(objectId));
400 			if (!(ro instanceof RevCommit) || reuse.contains(ro)
401 					|| excludeFromBitmapSelection.contains(ro)) {
402 				continue;
403 			}
404 
405 			RevCommit rc = (RevCommit) ro;
406 			rw.markStart(rc);
407 			newWants.add(rc);
408 			newWantsByNewest.add(rc);
409 		}
410 
411 		// Create a list of commits in reverse order (older to newer) that are
412 		// not in the previous bitmap indices and are reachable.
413 		rw.setRevFilter(new NotInBitmapFilter(reuse));
414 		RevCommit[] commits = new RevCommit[expectedCommitCount];
415 		int pos = commits.length;
416 		RevCommit rc;
417 		while ((rc = rw.next()) != null && pos > 0) {
418 			commits[--pos] = rc;
419 			pm.update(1);
420 		}
421 
422 		// Sort the new wants by reverse commit time.
423 		Collections.sort(newWantsByNewest, ORDER_BY_REVERSE_TIMESTAMP);
424 		return new CommitSelectionHelper(newWants, commits, pos,
425 				newWantsByNewest, reuse, reuseCommits);
426 	}
427 
428 	/*-
429 	 * Returns the desired distance to the next bitmap based on the distance
430 	 * from the tip commit. Only differentiates recent from distant spans,
431 	 * selectCommits() handles the contiguous commits at the tip for active
432 	 * or inactive branches.
433 	 *
434 	 * A graph of this function looks like this, where
435 	 * the X axis is the distance from the tip commit and the Y axis is the
436 	 * bitmap selection distance.
437 	 *
438 	 * 5000                ____...
439 	 *                    /
440 	 *                  /
441 	 *                /
442 	 *              /
443 	 *  100  _____/
444 	 *       0  20100  25000
445 	 *
446 	 * Linear scaling between 20100 and 25000 prevents spans >100 for distances
447 	 * <20000 (otherwise, a span of 5000 would be returned for a distance of
448 	 * 21000, and the range 16000-20000 would have no selections).
449 	 */
450 	int nextSpan(int distanceFromTip) {
451 		if (distanceFromTip < 0) {
452 			throw new IllegalArgumentException();
453 		}
454 
455 		// Commits more toward the start will have more bitmaps.
456 		if (distanceFromTip <= recentCommitCount) {
457 			return recentCommitSpan;
458 		}
459 
460 		int next = Math.min(distanceFromTip - recentCommitCount,
461 				distantCommitSpan);
462 		return Math.max(next, recentCommitSpan);
463 	}
464 
465 	BitmapWalker newBitmapWalker() {
466 		return new BitmapWalker(
467 				new ObjectWalk(reader), bitmapIndex, null);
468 	}
469 
470 	/**
471 	 * A commit object for which a bitmap index should be built.
472 	 */
473 	static final class BitmapCommit extends ObjectId {
474 		private final boolean reuseWalker;
475 		private final int flags;
476 
477 		BitmapCommit(AnyObjectId objectId, boolean reuseWalker, int flags) {
478 			super(objectId);
479 			this.reuseWalker = reuseWalker;
480 			this.flags = flags;
481 		}
482 
483 		boolean isReuseWalker() {
484 			return reuseWalker;
485 		}
486 
487 		int getFlags() {
488 			return flags;
489 		}
490 	}
491 
492 	/**
493 	 * Container for state used in the first phase of selecting commits, which
494 	 * walks all of the reachable commits via the branch tips that are not
495 	 * covered by a previous pack's bitmaps ({@code newWants}) and stores them
496 	 * in {@code newCommitsByOldest}. {@code newCommitsByOldest} is initialized
497 	 * with an expected size of all commits, but may be smaller if some commits
498 	 * are unreachable and/or some commits are covered by a previous pack's
499 	 * bitmaps. {@code commitStartPos} will contain a positive offset to either
500 	 * the root commit or the oldest commit not covered by previous bitmaps.
501 	 */
502 	private static final class CommitSelectionHelper implements Iterable<RevCommit> {
503 		final Set<? extends ObjectId> newWants;
504 
505 		final List<RevCommit> newWantsByNewest;
506 		final BitmapBuilder reusedCommitsBitmap;
507 
508 		final List<BitmapCommit> reusedCommits;
509 
510 		final RevCommit[] newCommitsByOldest;
511 
512 		final int newCommitStartPos;
513 
514 		CommitSelectionHelper(Set<? extends ObjectId> newWants,
515 				RevCommit[] commitsByOldest, int commitStartPos,
516 				List<RevCommit> newWantsByNewest,
517 				BitmapBuilder reusedCommitsBitmap,
518 				List<BitmapCommit> reuse) {
519 			this.newWants = newWants;
520 			this.newCommitsByOldest = commitsByOldest;
521 			this.newCommitStartPos = commitStartPos;
522 			this.newWantsByNewest = newWantsByNewest;
523 			this.reusedCommitsBitmap = reusedCommitsBitmap;
524 			this.reusedCommits = reuse;
525 		}
526 
527 		@Override
528 		public Iterator<RevCommit> iterator() {
529 			// Member variables referenced by this iterator will have synthetic
530 			// accessors generated for them if they are made private.
531 			return new Iterator<RevCommit>() {
532 				int pos = newCommitStartPos;
533 
534 				@Override
535 				public boolean hasNext() {
536 					return pos < newCommitsByOldest.length;
537 				}
538 
539 				@Override
540 				public RevCommit next() {
541 					return newCommitsByOldest[pos++];
542 				}
543 
544 				@Override
545 				public void remove() {
546 					throw new UnsupportedOperationException();
547 				}
548 			};
549 		}
550 
551 		int getCommitCount() {
552 			return newCommitsByOldest.length - newCommitStartPos;
553 		}
554 	}
555 }