View Javadoc
1   /*
2    * Copyright (C) 2012, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.internal.storage.pack;
12  
13  import static org.eclipse.jgit.internal.storage.file.PackBitmapIndex.FLAG_REUSE;
14  import static org.eclipse.jgit.revwalk.RevFlag.SEEN;
15  
16  import java.io.IOException;
17  import java.util.ArrayList;
18  import java.util.Collection;
19  import java.util.Collections;
20  import java.util.Comparator;
21  import java.util.HashSet;
22  import java.util.Iterator;
23  import java.util.List;
24  import java.util.Set;
25  
26  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
27  import org.eclipse.jgit.errors.MissingObjectException;
28  import org.eclipse.jgit.internal.JGitText;
29  import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
30  import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
31  import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap;
32  import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
33  import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder;
34  import org.eclipse.jgit.internal.storage.file.PackBitmapIndexRemapper;
35  import org.eclipse.jgit.lib.AnyObjectId;
36  import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
37  import org.eclipse.jgit.lib.Constants;
38  import org.eclipse.jgit.lib.ObjectId;
39  import org.eclipse.jgit.lib.ObjectReader;
40  import org.eclipse.jgit.lib.ProgressMonitor;
41  import org.eclipse.jgit.revwalk.BitmapWalker;
42  import org.eclipse.jgit.revwalk.ObjectWalk;
43  import org.eclipse.jgit.revwalk.RevCommit;
44  import org.eclipse.jgit.revwalk.RevObject;
45  import org.eclipse.jgit.revwalk.RevWalk;
46  import org.eclipse.jgit.revwalk.filter.RevFilter;
47  import org.eclipse.jgit.storage.pack.PackConfig;
48  import org.eclipse.jgit.util.BlockList;
49  import org.eclipse.jgit.util.SystemReader;
50  
51  import com.googlecode.javaewah.EWAHCompressedBitmap;
52  
53  /**
54   * Helper class for the {@link PackWriter} to select commits for which to build
55   * pack index bitmaps.
56   */
57  class PackWriterBitmapPreparer {
58  
59  	private static final int DAY_IN_SECONDS = 24 * 60 * 60;
60  
61  	private static final int DISTANCE_THRESHOLD = 2000;
62  
63  	private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
64  			RevCommit a, RevCommit b) -> Integer
65  					.signum(b.getCommitTime() - a.getCommitTime());
66  
67  	private final ObjectReader reader;
68  	private final ProgressMonitor pm;
69  	private final Set<? extends ObjectId> want;
70  	private final PackBitmapIndexBuilder writeBitmaps;
71  	private final BitmapIndexImpl commitBitmapIndex;
72  	private final PackBitmapIndexRemapper bitmapRemapper;
73  	private final BitmapIndexImpl bitmapIndex;
74  
75  	private final int contiguousCommitCount;
76  	private final int recentCommitCount;
77  	private final int recentCommitSpan;
78  	private final int distantCommitSpan;
79  	private final int excessiveBranchCount;
80  	private final long inactiveBranchTimestamp;
81  
82  	PackWriterBitmapPreparer(ObjectReader reader,
83  			PackBitmapIndexBuilder writeBitmaps, ProgressMonitor pm,
84  			Set<? extends ObjectId> want, PackConfig config)
85  					throws IOException {
86  		this.reader = reader;
87  		this.writeBitmaps = writeBitmaps;
88  		this.pm = pm;
89  		this.want = want;
90  		this.commitBitmapIndex = new BitmapIndexImpl(writeBitmaps);
91  		this.bitmapRemapper = PackBitmapIndexRemapper.newPackBitmapIndex(
92  				reader.getBitmapIndex(), writeBitmaps);
93  		this.bitmapIndex = new BitmapIndexImpl(bitmapRemapper);
94  		this.contiguousCommitCount = config.getBitmapContiguousCommitCount();
95  		this.recentCommitCount = config.getBitmapRecentCommitCount();
96  		this.recentCommitSpan = config.getBitmapRecentCommitSpan();
97  		this.distantCommitSpan = config.getBitmapDistantCommitSpan();
98  		this.excessiveBranchCount = config.getBitmapExcessiveBranchCount();
99  		long now = SystemReader.getInstance().getCurrentTime();
100 		long ageInSeconds = config.getBitmapInactiveBranchAgeInDays()
101 				* DAY_IN_SECONDS;
102 		this.inactiveBranchTimestamp = (now / 1000) - ageInSeconds;
103 	}
104 
105 	/**
106 	 * Returns the commit objects for which bitmap indices should be built.
107 	 *
108 	 * @param expectedCommitCount
109 	 *            count of commits in the pack
110 	 * @param excludeFromBitmapSelection
111 	 *            commits that should be excluded from bitmap selection
112 	 * @return commit objects for which bitmap indices should be built
113 	 * @throws IncorrectObjectTypeException
114 	 *             if any of the processed objects is not a commit
115 	 * @throws IOException
116 	 *             on errors reading pack or index files
117 	 * @throws MissingObjectException
118 	 *             if an expected object is missing
119 	 */
120 	Collection<BitmapCommit> selectCommits(int expectedCommitCount,
121 			Set<? extends ObjectId> excludeFromBitmapSelection)
122 			throws IncorrectObjectTypeException, IOException,
123 			MissingObjectException {
124 		/*
125 		 * Thinking of bitmap indices as a cache, if we find bitmaps at or at a
126 		 * close ancestor to 'old' and 'new' when calculating old..new, then all
127 		 * objects can be calculated with minimal graph walking. A distribution
128 		 * that favors creating bitmaps for the most recent commits maximizes
129 		 * the cache hits for clients that are close to HEAD, which is the
130 		 * majority of calculations performed.
131 		 */
132 		try (RevWalkvwalk/RevWalk.html#RevWalk">RevWalk rw = new RevWalk(reader);
133 				RevWalk rw2 = new RevWalk(reader)) {
134 			pm.beginTask(JGitText.get().selectingCommits,
135 					ProgressMonitor.UNKNOWN);
136 			rw.setRetainBody(false);
137 			CommitSelectionHelper selectionHelper = captureOldAndNewCommits(rw,
138 					expectedCommitCount, excludeFromBitmapSelection);
139 			pm.endTask();
140 
141 			// Add reused bitmaps from the previous GC pack's bitmap indices.
142 			// Currently they are always fully reused, even if their spans don't
143 			// match this run's PackConfig values.
144 			int newCommits = selectionHelper.getCommitCount();
145 			BlockList<BitmapCommit> selections = new BlockList<>(
146 					selectionHelper.reusedCommits.size()
147 							+ newCommits / recentCommitSpan + 1);
148 			for (BitmapCommit reuse : selectionHelper.reusedCommits) {
149 				selections.add(reuse);
150 			}
151 
152 			if (newCommits == 0) {
153 				for (AnyObjectId id : selectionHelper.newWants) {
154 					selections.add(new BitmapCommit(id, false, 0));
155 				}
156 				return selections;
157 			}
158 
159 			pm.beginTask(JGitText.get().selectingCommits, newCommits);
160 			int totalWants = want.size();
161 			BitmapBuilder seen = commitBitmapIndex.newBitmapBuilder();
162 			seen.or(selectionHelper.reusedCommitsBitmap);
163 			rw2.setRetainBody(false);
164 			rw2.setRevFilter(new NotInBitmapFilter(seen));
165 
166 			// For each branch, do a revwalk to enumerate its commits. Exclude
167 			// both reused commits and any commits seen in a previous branch.
168 			// Then iterate through all new commits from oldest to newest,
169 			// selecting well-spaced commits in this branch.
170 			for (RevCommit rc : selectionHelper.newWantsByNewest) {
171 				BitmapBuilder tipBitmap = commitBitmapIndex.newBitmapBuilder();
172 				rw2.markStart((RevCommit) rw2.peel(rw2.parseAny(rc)));
173 				RevCommit rc2;
174 				while ((rc2 = rw2.next()) != null) {
175 					tipBitmap.addObject(rc2, Constants.OBJ_COMMIT);
176 				}
177 				int cardinality = tipBitmap.cardinality();
178 				seen.or(tipBitmap);
179 
180 				// Within this branch, keep ordered lists of commits
181 				// representing chains in its history, where each chain is a
182 				// "sub-branch". Ordering commits by these chains makes for
183 				// fewer differences between consecutive selected commits, which
184 				// in turn provides better compression/on the run-length
185 				// encoding of the XORs between them.
186 				List<List<BitmapCommit>> chains = new ArrayList<>();
187 
188 				// Mark the current branch as inactive if its tip commit isn't
189 				// recent and there are an excessive number of branches, to
190 				// prevent memory bloat of computing too many bitmaps for stale
191 				// branches.
192 				boolean isActiveBranch = true;
193 				if (totalWants > excessiveBranchCount && !isRecentCommit(rc)) {
194 					isActiveBranch = false;
195 				}
196 
197 				// Insert bitmaps at the offsets suggested by the
198 				// nextSelectionDistance() heuristic. Only reuse bitmaps created
199 				// for more distant commits.
200 				int index = -1;
201 				int nextIn = nextSpan(cardinality);
202 				int nextFlg = nextIn == distantCommitSpan
203 						? PackBitmapIndex.FLAG_REUSE
204 						: 0;
205 
206 				// For the current branch, iterate through all commits from
207 				// oldest to newest.
208 				for (RevCommit c : selectionHelper) {
209 					// Optimization: if we have found all the commits for this
210 					// branch, stop searching
211 					int distanceFromTip = cardinality - index - 1;
212 					if (distanceFromTip == 0) {
213 						break;
214 					}
215 
216 					// Ignore commits that are not in this branch
217 					if (!tipBitmap.contains(c)) {
218 						continue;
219 					}
220 
221 					index++;
222 					nextIn--;
223 					pm.update(1);
224 
225 					// Always pick the items in wants, prefer merge commits.
226 					if (selectionHelper.newWants.remove(c)) {
227 						if (nextIn > 0) {
228 							nextFlg = 0;
229 						}
230 					} else {
231 						boolean stillInSpan = nextIn >= 0;
232 						boolean isMergeCommit = c.getParentCount() > 1;
233 						// Force selection if:
234 						// a) we have exhausted the window looking for merges
235 						// b) we are in the top commits of an active branch
236 						// c) we are at a branch tip
237 						boolean mustPick = (nextIn <= -recentCommitSpan)
238 								|| (isActiveBranch
239 										&& (distanceFromTip <= contiguousCommitCount))
240 								|| (distanceFromTip == 1); // most recent commit
241 						if (!mustPick && (stillInSpan || !isMergeCommit)) {
242 							continue;
243 						}
244 					}
245 
246 					// This commit is selected.
247 					// Calculate where to look for the next one.
248 					int flags = nextFlg;
249 					int currDist = distanceFromTip;
250 					nextIn = nextSpan(distanceFromTip);
251 					nextFlg = nextIn == distantCommitSpan
252 							? PackBitmapIndex.FLAG_REUSE
253 							: 0;
254 
255 					// Create the commit bitmap for the current commit
256 					BitmapBuilder bitmap = commitBitmapIndex.newBitmapBuilder();
257 					rw.reset();
258 					rw.markStart(c);
259 					rw.setRevFilter(new AddUnseenToBitmapFilter(
260 							selectionHelper.reusedCommitsBitmap, bitmap));
261 					while (rw.next() != null) {
262 						// The filter adds the reachable commits to bitmap.
263 					}
264 
265 					// Sort the commits by independent chains in this branch's
266 					// history, yielding better compression when building
267 					// bitmaps.
268 					List<BitmapCommit> longestAncestorChain = null;
269 					for (List<BitmapCommit> chain : chains) {
270 						BitmapCommit mostRecentCommit = chain
271 								.get(chain.size() - 1);
272 						if (bitmap.contains(mostRecentCommit)) {
273 							if (longestAncestorChain == null
274 									|| longestAncestorChain.size() < chain
275 											.size()) {
276 								longestAncestorChain = chain;
277 							}
278 						}
279 					}
280 
281 					if (longestAncestorChain == null) {
282 						longestAncestorChain = new ArrayList<>();
283 						chains.add(longestAncestorChain);
284 					}
285 
286 					// The commit bc should reuse bitmap walker from its close
287 					// ancestor. And the bitmap of bc should only be added to
288 					// PackBitmapIndexBuilder when it's an old enough
289 					// commit,i.e. distance from tip should be greater than
290 					// DISTANCE_THRESHOLD, to save memory.
291 					BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags)
292 							.setAddToIndex(currDist >= DISTANCE_THRESHOLD)
293 							.setReuseWalker(!longestAncestorChain.isEmpty())
294 							.build();
295 					longestAncestorChain.add(bc);
296 					writeBitmaps.addBitmap(c, bitmap, 0);
297 				}
298 
299 				for (List<BitmapCommit> chain : chains) {
300 					selections.addAll(chain);
301 				}
302 			}
303 
304 			// Add the remaining peeledWant
305 			for (AnyObjectId remainingWant : selectionHelper.newWants) {
306 				selections.add(new BitmapCommit(remainingWant, false, 0));
307 			}
308 			writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps.
309 
310 			pm.endTask();
311 			return selections;
312 		}
313 	}
314 
315 	private boolean isRecentCommit(RevCommit revCommit) {
316 		return revCommit.getCommitTime() > inactiveBranchTimestamp;
317 	}
318 
319 	/**
320 	 * A RevFilter that excludes the commits named in a bitmap from the walk.
321 	 * <p>
322 	 * If a commit is in {@code bitmap} then that commit is not emitted by the
323 	 * walk and its parents are marked as SEEN so the walk can skip them.  The
324 	 * bitmaps passed in have the property that the parents of any commit in
325 	 * {@code bitmap} are also in {@code bitmap}, so marking the parents as
326 	 * SEEN speeds up the RevWalk by saving it from walking down blind alleys
327 	 * and does not change the commits emitted.
328 	 */
329 	private static class NotInBitmapFilter extends RevFilter {
330 		private final BitmapBuilder bitmap;
331 
332 		NotInBitmapFilter(BitmapBuilder bitmap) {
333 			this.bitmap = bitmap;
334 		}
335 
336 		@Override
337 		public final boolean include(RevWalk rw, RevCommit c) {
338 			if (!bitmap.contains(c)) {
339 				return true;
340 			}
341 			for (RevCommit p : c.getParents()) {
342 				p.add(SEEN);
343 			}
344 			return false;
345 		}
346 
347 		@Override
348 		public final NotInBitmapFilter clone() {
349 			throw new UnsupportedOperationException();
350 		}
351 
352 		@Override
353 		public final boolean requiresCommitBody() {
354 			return false;
355 		}
356 	}
357 
358 	/**
359 	 * Records which of the {@code wants} can be found in the previous GC pack's
360 	 * bitmap indices and which are new.
361 	 *
362 	 * @param rw
363 	 *            a {@link RevWalk} to find reachable objects in this repository
364 	 * @param expectedCommitCount
365 	 *            expected count of commits. The actual count may be less due to
366 	 *            unreachable garbage.
367 	 * @param excludeFromBitmapSelection
368 	 *            commits that should be excluded from bitmap selection
369 	 * @return a {@link CommitSelectionHelper} capturing which commits are
370 	 *         covered by a previous pack's bitmaps and which new commits need
371 	 *         bitmap coverage
372 	 * @throws IncorrectObjectTypeException
373 	 *             if any of the processed objects is not a commit
374 	 * @throws IOException
375 	 *             on errors reading pack or index files
376 	 * @throws MissingObjectException
377 	 *             if an expected object is missing
378 	 */
379 	private CommitSelectionHelper captureOldAndNewCommits(RevWalk rw,
380 			int expectedCommitCount,
381 			Set<? extends ObjectId> excludeFromBitmapSelection)
382 			throws IncorrectObjectTypeException, IOException,
383 			MissingObjectException {
384 		// Track bitmaps and commits from the previous GC pack bitmap indices.
385 		BitmapBuilder reuse = commitBitmapIndex.newBitmapBuilder();
386 		List<BitmapCommit> reuseCommits = new ArrayList<>();
387 		for (PackBitmapIndexRemapper.Entry entry : bitmapRemapper) {
388 			// More recent commits did not have the reuse flag set, so skip them
389 			if ((entry.getFlags() & FLAG_REUSE) != FLAG_REUSE) {
390 				continue;
391 			}
392 			RevObject ro = rw.peel(rw.parseAny(entry));
393 			if (!(ro instanceof RevCommit)) {
394 				continue;
395 			}
396 
397 			RevCommit rc = (RevCommit) ro;
398 			reuseCommits.add(new BitmapCommit(rc, false, entry.getFlags()));
399 			if (!reuse.contains(rc)) {
400 				EWAHCompressedBitmap bitmap = bitmapRemapper.ofObjectType(
401 						bitmapRemapper.getBitmap(rc), Constants.OBJ_COMMIT);
402 				reuse.or(new CompressedBitmap(bitmap, commitBitmapIndex));
403 			}
404 		}
405 
406 		// Add branch tips that are not represented in a previous pack's bitmap
407 		// indices. Set up a RevWalk to find new commits not in the old packs.
408 		List<RevCommit> newWantsByNewest = new ArrayList<>(want.size());
409 		Set<RevCommit> newWants = new HashSet<>(want.size());
410 		for (AnyObjectId objectId : want) {
411 			RevObject ro = rw.peel(rw.parseAny(objectId));
412 			if (!(ro instanceof RevCommit) || reuse.contains(ro)
413 					|| excludeFromBitmapSelection.contains(ro)) {
414 				continue;
415 			}
416 
417 			RevCommit rc = (RevCommit) ro;
418 			rw.markStart(rc);
419 			newWants.add(rc);
420 			newWantsByNewest.add(rc);
421 		}
422 
423 		// Create a list of commits in reverse order (older to newer) that are
424 		// not in the previous bitmap indices and are reachable.
425 		rw.setRevFilter(new NotInBitmapFilter(reuse));
426 		RevCommit[] commits = new RevCommit[expectedCommitCount];
427 		int pos = commits.length;
428 		RevCommit rc;
429 		while ((rc = rw.next()) != null && pos > 0) {
430 			commits[--pos] = rc;
431 			pm.update(1);
432 		}
433 
434 		// Sort the new wants by reverse commit time.
435 		Collections.sort(newWantsByNewest, ORDER_BY_REVERSE_TIMESTAMP);
436 		return new CommitSelectionHelper(newWants, commits, pos,
437 				newWantsByNewest, reuse, reuseCommits);
438 	}
439 
440 	/*-
441 	 * Returns the desired distance to the next bitmap based on the distance
442 	 * from the tip commit. Only differentiates recent from distant spans,
443 	 * selectCommits() handles the contiguous commits at the tip for active
444 	 * or inactive branches.
445 	 *
446 	 * A graph of this function looks like this, where
447 	 * the X axis is the distance from the tip commit and the Y axis is the
448 	 * bitmap selection distance.
449 	 *
450 	 * 5000                ____...
451 	 *                    /
452 	 *                  /
453 	 *                /
454 	 *              /
455 	 *  100  _____/
456 	 *       0  20100  25000
457 	 *
458 	 * Linear scaling between 20100 and 25000 prevents spans >100 for distances
459 	 * <20000 (otherwise, a span of 5000 would be returned for a distance of
460 	 * 21000, and the range 16000-20000 would have no selections).
461 	 */
462 	int nextSpan(int distanceFromTip) {
463 		if (distanceFromTip < 0) {
464 			throw new IllegalArgumentException();
465 		}
466 
467 		// Commits more toward the start will have more bitmaps.
468 		if (distanceFromTip <= recentCommitCount) {
469 			return recentCommitSpan;
470 		}
471 
472 		int next = Math.min(distanceFromTip - recentCommitCount,
473 				distantCommitSpan);
474 		return Math.max(next, recentCommitSpan);
475 	}
476 
477 	BitmapWalker newBitmapWalker() {
478 		return new BitmapWalker(
479 				new ObjectWalk(reader), bitmapIndex, null);
480 	}
481 
482 	/**
483 	 * Container for state used in the first phase of selecting commits, which
484 	 * walks all of the reachable commits via the branch tips that are not
485 	 * covered by a previous pack's bitmaps ({@code newWants}) and stores them
486 	 * in {@code newCommitsByOldest}. {@code newCommitsByOldest} is initialized
487 	 * with an expected size of all commits, but may be smaller if some commits
488 	 * are unreachable and/or some commits are covered by a previous pack's
489 	 * bitmaps. {@code commitStartPos} will contain a positive offset to either
490 	 * the root commit or the oldest commit not covered by previous bitmaps.
491 	 */
492 	private static final class CommitSelectionHelper implements Iterable<RevCommit> {
493 		final Set<? extends ObjectId> newWants;
494 
495 		final List<RevCommit> newWantsByNewest;
496 		final BitmapBuilder reusedCommitsBitmap;
497 
498 		final List<BitmapCommit> reusedCommits;
499 
500 		final RevCommit[] newCommitsByOldest;
501 
502 		final int newCommitStartPos;
503 
504 		CommitSelectionHelper(Set<? extends ObjectId> newWants,
505 				RevCommit[] commitsByOldest, int commitStartPos,
506 				List<RevCommit> newWantsByNewest,
507 				BitmapBuilder reusedCommitsBitmap,
508 				List<BitmapCommit> reuse) {
509 			this.newWants = newWants;
510 			this.newCommitsByOldest = commitsByOldest;
511 			this.newCommitStartPos = commitStartPos;
512 			this.newWantsByNewest = newWantsByNewest;
513 			this.reusedCommitsBitmap = reusedCommitsBitmap;
514 			this.reusedCommits = reuse;
515 		}
516 
517 		@Override
518 		public Iterator<RevCommit> iterator() {
519 			// Member variables referenced by this iterator will have synthetic
520 			// accessors generated for them if they are made private.
521 			return new Iterator<RevCommit>() {
522 				int pos = newCommitStartPos;
523 
524 				@Override
525 				public boolean hasNext() {
526 					return pos < newCommitsByOldest.length;
527 				}
528 
529 				@Override
530 				public RevCommit next() {
531 					return newCommitsByOldest[pos++];
532 				}
533 
534 				@Override
535 				public void remove() {
536 					throw new UnsupportedOperationException();
537 				}
538 			};
539 		}
540 
541 		int getCommitCount() {
542 			return newCommitsByOldest.length - newCommitStartPos;
543 		}
544 	}
545 }