View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
47  import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
48  import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
49  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
50  import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
51  import static org.eclipse.jgit.lib.RefDatabase.ALL;
52  
53  import java.io.IOException;
54  import java.util.ArrayList;
55  import java.util.Collections;
56  import java.util.HashSet;
57  import java.util.List;
58  import java.util.Map;
59  import java.util.Set;
60  
61  import org.eclipse.jgit.internal.JGitText;
62  import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
63  import org.eclipse.jgit.internal.storage.file.PackIndex;
64  import org.eclipse.jgit.internal.storage.pack.PackExt;
65  import org.eclipse.jgit.internal.storage.pack.PackWriter;
66  import org.eclipse.jgit.lib.AnyObjectId;
67  import org.eclipse.jgit.lib.Constants;
68  import org.eclipse.jgit.lib.NullProgressMonitor;
69  import org.eclipse.jgit.lib.ObjectId;
70  import org.eclipse.jgit.lib.ObjectIdOwnerMap;
71  import org.eclipse.jgit.lib.ProgressMonitor;
72  import org.eclipse.jgit.lib.Ref;
73  import org.eclipse.jgit.revwalk.RevWalk;
74  import org.eclipse.jgit.storage.pack.PackConfig;
75  import org.eclipse.jgit.storage.pack.PackStatistics;
76  import org.eclipse.jgit.util.io.CountingOutputStream;
77  
78  /** Repack and garbage collect a repository. */
79  public class DfsGarbageCollector {
80  	private final DfsRepository repo;
81  
82  	private final DfsRefDatabase refdb;
83  
84  	private final DfsObjDatabase objdb;
85  
86  	private final List<DfsPackDescription> newPackDesc;
87  
88  	private final List<PackStatistics> newPackStats;
89  
90  	private final List<PackWriter.ObjectIdSet> newPackObj;
91  
92  	private DfsReader ctx;
93  
94  	private PackConfig packConfig;
95  
96  	private long coalesceGarbageLimit = 50 << 20;
97  
98  	private Map<String, Ref> refsBefore;
99  
100 	private List<DfsPackFile> packsBefore;
101 
102 	private Set<ObjectId> allHeads;
103 
104 	private Set<ObjectId> nonHeads;
105 
106 	private Set<ObjectId> tagTargets;
107 
108 	/**
109 	 * Initialize a garbage collector.
110 	 *
111 	 * @param repository
112 	 *            repository objects to be packed will be read from.
113 	 */
114 	public DfsGarbageCollector(DfsRepository repository) {
115 		repo = repository;
116 		refdb = repo.getRefDatabase();
117 		objdb = repo.getObjectDatabase();
118 		newPackDesc = new ArrayList<DfsPackDescription>(4);
119 		newPackStats = new ArrayList<PackStatistics>(4);
120 		newPackObj = new ArrayList<PackWriter.ObjectIdSet>(4);
121 
122 		packConfig = new PackConfig(repo);
123 		packConfig.setIndexVersion(2);
124 	}
125 
126 	/** @return configuration used to generate the new pack file. */
127 	public PackConfig getPackConfig() {
128 		return packConfig;
129 	}
130 
131 	/**
132 	 * @param newConfig
133 	 *            the new configuration to use when creating the pack file.
134 	 * @return {@code this}
135 	 */
136 	public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
137 		packConfig = newConfig;
138 		return this;
139 	}
140 
141 	/** @return garbage packs smaller than this size will be repacked. */
142 	public long getCoalesceGarbageLimit() {
143 		return coalesceGarbageLimit;
144 	}
145 
146 	/**
147 	 * Set the byte size limit for garbage packs to be repacked.
148 	 * <p>
149 	 * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
150 	 * the end of the run. This allows the garbage collector to coalesce
151 	 * unreachable objects into a single file.
152 	 * <p>
153 	 * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
154 	 * be left alone by the garbage collector. This avoids unnecessary disk IO
155 	 * reading and copying the objects.
156 	 * <p>
157 	 * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
158 	 * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
159 	 * <p>
160 	 * Keeping unreachable garbage prevents race conditions with repository
161 	 * changes that may suddenly need an object whose only copy was stored in
162 	 * the UNREACHABLE_GARBAGE pack.
163 	 *
164 	 * @param limit
165 	 *            size in bytes.
166 	 * @return {@code this}
167 	 */
168 	public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
169 		coalesceGarbageLimit = limit;
170 		return this;
171 	}
172 
173 	/**
174 	 * Create a single new pack file containing all of the live objects.
175 	 * <p>
176 	 * This method safely decides which packs can be expired after the new pack
177 	 * is created by validating the references have not been modified in an
178 	 * incompatible way.
179 	 *
180 	 * @param pm
181 	 *            progress monitor to receive updates on as packing may take a
182 	 *            while, depending on the size of the repository.
183 	 * @return true if the repack was successful without race conditions. False
184 	 *         if a race condition was detected and the repack should be run
185 	 *         again later.
186 	 * @throws IOException
187 	 *             a new pack cannot be created.
188 	 */
189 	public boolean pack(ProgressMonitor pm) throws IOException {
190 		if (pm == null)
191 			pm = NullProgressMonitor.INSTANCE;
192 		if (packConfig.getIndexVersion() != 2)
193 			throw new IllegalStateException(
194 					JGitText.get().supportOnlyPackIndexVersion2);
195 
196 		ctx = (DfsReader) objdb.newReader();
197 		try {
198 			refdb.clearCache();
199 			objdb.clearCache();
200 
201 			refsBefore = refdb.getRefs(ALL);
202 			packsBefore = packsToRebuild();
203 			if (packsBefore.isEmpty())
204 				return true;
205 
206 			allHeads = new HashSet<ObjectId>();
207 			nonHeads = new HashSet<ObjectId>();
208 			tagTargets = new HashSet<ObjectId>();
209 			for (Ref ref : refsBefore.values()) {
210 				if (ref.isSymbolic() || ref.getObjectId() == null)
211 					continue;
212 				if (isHead(ref))
213 					allHeads.add(ref.getObjectId());
214 				else
215 					nonHeads.add(ref.getObjectId());
216 				if (ref.getPeeledObjectId() != null)
217 					tagTargets.add(ref.getPeeledObjectId());
218 			}
219 			tagTargets.addAll(allHeads);
220 
221 			boolean rollback = true;
222 			try {
223 				packHeads(pm);
224 				packRest(pm);
225 				packGarbage(pm);
226 				objdb.commitPack(newPackDesc, toPrune());
227 				rollback = false;
228 				return true;
229 			} finally {
230 				if (rollback)
231 					objdb.rollbackPack(newPackDesc);
232 			}
233 		} finally {
234 			ctx.close();
235 		}
236 	}
237 
238 	private List<DfsPackFile> packsToRebuild() throws IOException {
239 		DfsPackFile[] packs = objdb.getPacks();
240 		List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
241 		for (DfsPackFile p : packs) {
242 			DfsPackDescription d = p.getPackDescription();
243 			if (d.getPackSource() != UNREACHABLE_GARBAGE)
244 				out.add(p);
245 			else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
246 				out.add(p);
247 		}
248 		return out;
249 	}
250 
251 	/** @return all of the source packs that fed into this compaction. */
252 	public List<DfsPackDescription> getSourcePacks() {
253 		return toPrune();
254 	}
255 
256 	/** @return new packs created by this compaction. */
257 	public List<DfsPackDescription> getNewPacks() {
258 		return newPackDesc;
259 	}
260 
261 	/** @return statistics corresponding to the {@link #getNewPacks()}. */
262 	public List<PackStatistics> getNewPackStatistics() {
263 		return newPackStats;
264 	}
265 
266 	private List<DfsPackDescription> toPrune() {
267 		int cnt = packsBefore.size();
268 		List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
269 		for (DfsPackFile pack : packsBefore)
270 			all.add(pack.getPackDescription());
271 		return all;
272 	}
273 
274 	private void packHeads(ProgressMonitor pm) throws IOException {
275 		if (allHeads.isEmpty())
276 			return;
277 
278 		try (PackWriter pw = newPackWriter()) {
279 			pw.setTagTargets(tagTargets);
280 			pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
281 			if (0 < pw.getObjectCount())
282 				writePack(GC, pw, pm);
283 		}
284 	}
285 
286 	private void packRest(ProgressMonitor pm) throws IOException {
287 		if (nonHeads.isEmpty())
288 			return;
289 
290 		try (PackWriter pw = newPackWriter()) {
291 			for (PackWriter.ObjectIdSet packedObjs : newPackObj)
292 				pw.excludeObjects(packedObjs);
293 			pw.preparePack(pm, nonHeads, allHeads);
294 			if (0 < pw.getObjectCount())
295 				writePack(GC, pw, pm);
296 		}
297 	}
298 
299 	private void packGarbage(ProgressMonitor pm) throws IOException {
300 		// TODO(sop) This is ugly. The garbage pack needs to be deleted.
301 		PackConfig cfg = new PackConfig(packConfig);
302 		cfg.setReuseDeltas(true);
303 		cfg.setReuseObjects(true);
304 		cfg.setDeltaCompress(false);
305 		cfg.setBuildBitmaps(false);
306 
307 		try (PackWriter pw = new PackWriter(cfg, ctx);
308 				RevWalk pool = new RevWalk(ctx)) {
309 			pw.setDeltaBaseAsOffset(true);
310 			pw.setReuseDeltaCommits(true);
311 			pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
312 			for (DfsPackFile oldPack : packsBefore) {
313 				PackIndex oldIdx = oldPack.getPackIndex(ctx);
314 				for (PackIndex.MutableEntry ent : oldIdx) {
315 					pm.update(1);
316 					ObjectId id = ent.toObjectId();
317 					if (pool.lookupOrNull(id) != null || anyPackHas(id))
318 						continue;
319 
320 					int type = oldPack.getObjectType(ctx, ent.getOffset());
321 					pw.addObject(pool.lookupAny(id, type));
322 				}
323 			}
324 			pm.endTask();
325 			if (0 < pw.getObjectCount())
326 				writePack(UNREACHABLE_GARBAGE, pw, pm);
327 		}
328 	}
329 
330 	private boolean anyPackHas(AnyObjectId id) {
331 		for (PackWriter.ObjectIdSet packedObjs : newPackObj)
332 			if (packedObjs.contains(id))
333 				return true;
334 		return false;
335 	}
336 
337 	private static boolean isHead(Ref ref) {
338 		return ref.getName().startsWith(Constants.R_HEADS);
339 	}
340 
341 	private int objectsBefore() {
342 		int cnt = 0;
343 		for (DfsPackFile p : packsBefore)
344 			cnt += p.getPackDescription().getObjectCount();
345 		return cnt;
346 	}
347 
348 	private PackWriter newPackWriter() {
349 		PackWriter pw = new PackWriter(packConfig, ctx);
350 		pw.setDeltaBaseAsOffset(true);
351 		pw.setReuseDeltaCommits(false);
352 		return pw;
353 	}
354 
355 	private DfsPackDescription writePack(PackSource source, PackWriter pw,
356 			ProgressMonitor pm) throws IOException {
357 		DfsOutputStream out;
358 		DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
359 		newPackDesc.add(pack);
360 
361 		out = objdb.writeFile(pack, PACK);
362 		try {
363 			pw.writePack(pm, pm, out);
364 			pack.addFileExt(PACK);
365 		} finally {
366 			out.close();
367 		}
368 
369 		out = objdb.writeFile(pack, INDEX);
370 		try {
371 			CountingOutputStream cnt = new CountingOutputStream(out);
372 			pw.writeIndex(cnt);
373 			pack.addFileExt(INDEX);
374 			pack.setFileSize(INDEX, cnt.getCount());
375 			pack.setIndexVersion(pw.getIndexVersion());
376 		} finally {
377 			out.close();
378 		}
379 
380 		if (pw.prepareBitmapIndex(pm)) {
381 			out = objdb.writeFile(pack, BITMAP_INDEX);
382 			try {
383 				CountingOutputStream cnt = new CountingOutputStream(out);
384 				pw.writeBitmapIndex(cnt);
385 				pack.addFileExt(BITMAP_INDEX);
386 				pack.setFileSize(BITMAP_INDEX, cnt.getCount());
387 			} finally {
388 				out.close();
389 			}
390 		}
391 
392 		final ObjectIdOwnerMap<ObjectIdOwnerMap.Entry> packedObjs = pw
393 				.getObjectSet();
394 		newPackObj.add(new PackWriter.ObjectIdSet() {
395 			public boolean contains(AnyObjectId objectId) {
396 				return packedObjs.contains(objectId);
397 			}
398 		});
399 
400 		PackStatistics stats = pw.getStatistics();
401 		pack.setPackStats(stats);
402 		newPackStats.add(stats);
403 
404 		DfsBlockCache.getInstance().getOrCreate(pack, null);
405 		return pack;
406 	}
407 }