View Javadoc
1   /*
2    * Copyright (C) 2012, Christian Halstrick <christian.halstrick@sap.com>
3    * Copyright (C) 2011, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  package org.eclipse.jgit.internal.storage.file;
45  
46  import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
47  import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
48  import static org.eclipse.jgit.lib.RefDatabase.ALL;
49  
50  import java.io.File;
51  import java.io.FileOutputStream;
52  import java.io.IOException;
53  import java.io.OutputStream;
54  import java.nio.channels.Channels;
55  import java.nio.channels.FileChannel;
56  import java.text.MessageFormat;
57  import java.text.ParseException;
58  import java.util.ArrayList;
59  import java.util.Collection;
60  import java.util.Collections;
61  import java.util.Comparator;
62  import java.util.Date;
63  import java.util.HashMap;
64  import java.util.HashSet;
65  import java.util.Iterator;
66  import java.util.LinkedList;
67  import java.util.List;
68  import java.util.Map;
69  import java.util.Map.Entry;
70  import java.util.Set;
71  import java.util.TreeMap;
72  
73  import org.eclipse.jgit.dircache.DirCacheIterator;
74  import org.eclipse.jgit.errors.CorruptObjectException;
75  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
76  import org.eclipse.jgit.errors.MissingObjectException;
77  import org.eclipse.jgit.errors.NoWorkTreeException;
78  import org.eclipse.jgit.internal.JGitText;
79  import org.eclipse.jgit.internal.storage.pack.PackExt;
80  import org.eclipse.jgit.internal.storage.pack.PackWriter;
81  import org.eclipse.jgit.internal.storage.pack.PackWriter.ObjectIdSet;
82  import org.eclipse.jgit.lib.AnyObjectId;
83  import org.eclipse.jgit.lib.ConfigConstants;
84  import org.eclipse.jgit.lib.Constants;
85  import org.eclipse.jgit.lib.FileMode;
86  import org.eclipse.jgit.lib.NullProgressMonitor;
87  import org.eclipse.jgit.lib.ObjectId;
88  import org.eclipse.jgit.lib.ProgressMonitor;
89  import org.eclipse.jgit.lib.Ref;
90  import org.eclipse.jgit.lib.Ref.Storage;
91  import org.eclipse.jgit.lib.RefDatabase;
92  import org.eclipse.jgit.lib.ReflogEntry;
93  import org.eclipse.jgit.revwalk.ObjectWalk;
94  import org.eclipse.jgit.revwalk.RevObject;
95  import org.eclipse.jgit.revwalk.RevWalk;
96  import org.eclipse.jgit.storage.pack.PackConfig;
97  import org.eclipse.jgit.treewalk.TreeWalk;
98  import org.eclipse.jgit.treewalk.filter.TreeFilter;
99  import org.eclipse.jgit.util.FileUtils;
100 import org.eclipse.jgit.util.GitDateParser;
101 import org.eclipse.jgit.util.SystemReader;
102 
103 /**
104  * A garbage collector for git {@link FileRepository}. Instances of this class
105  * are not thread-safe. Don't use the same instance from multiple threads.
106  *
107  * This class started as a copy of DfsGarbageCollector from Shawn O. Pearce
108  * adapted to FileRepositories.
109  */
110 public class GC {
111 	private static final String PRUNE_EXPIRE_DEFAULT = "2.weeks.ago"; //$NON-NLS-1$
112 
113 	private final FileRepository repo;
114 
115 	private ProgressMonitor pm;
116 
117 	private long expireAgeMillis = -1;
118 
119 	private Date expire;
120 
121 	private PackConfig pconfig = null;
122 
123 	/**
124 	 * the refs which existed during the last call to {@link #repack()}. This is
125 	 * needed during {@link #prune(Set)} where we can optimize by looking at the
126 	 * difference between the current refs and the refs which existed during
127 	 * last {@link #repack()}.
128 	 */
129 	private Map<String, Ref> lastPackedRefs;
130 
131 	/**
132 	 * Holds the starting time of the last repack() execution. This is needed in
133 	 * prune() to inspect only those reflog entries which have been added since
134 	 * last repack().
135 	 */
136 	private long lastRepackTime;
137 
138 	/**
139 	 * Creates a new garbage collector with default values. An expirationTime of
140 	 * two weeks and <code>null</code> as progress monitor will be used.
141 	 *
142 	 * @param repo
143 	 *            the repo to work on
144 	 */
145 	public GC(FileRepository repo) {
146 		this.repo = repo;
147 		this.pm = NullProgressMonitor.INSTANCE;
148 	}
149 
150 	/**
151 	 * Runs a garbage collector on a {@link FileRepository}. It will
152 	 * <ul>
153 	 * <li>pack loose references into packed-refs</li>
154 	 * <li>repack all reachable objects into new pack files and delete the old
155 	 * pack files</li>
156 	 * <li>prune all loose objects which are now reachable by packs</li>
157 	 * </ul>
158 	 *
159 	 * @return the collection of {@link PackFile}'s which are newly created
160 	 * @throws IOException
161 	 * @throws ParseException
162 	 *             If the configuration parameter "gc.pruneexpire" couldn't be
163 	 *             parsed
164 	 */
165 	public Collection<PackFile> gc() throws IOException, ParseException {
166 		pm.start(6 /* tasks */);
167 		packRefs();
168 		// TODO: implement reflog_expire(pm, repo);
169 		Collection<PackFile> newPacks = repack();
170 		prune(Collections.<ObjectId> emptySet());
171 		// TODO: implement rerere_gc(pm);
172 		return newPacks;
173 	}
174 
175 	/**
176 	 * Delete old pack files. What is 'old' is defined by specifying a set of
177 	 * old pack files and a set of new pack files. Each pack file contained in
178 	 * old pack files but not contained in new pack files will be deleted. If an
179 	 * expirationDate is set then pack files which are younger than the
180 	 * expirationDate will not be deleted.
181 	 *
182 	 * @param oldPacks
183 	 * @param newPacks
184 	 * @throws ParseException
185 	 */
186 	private void deleteOldPacks(Collection<PackFile> oldPacks,
187 			Collection<PackFile> newPacks) throws ParseException {
188 		long expireDate = getExpireDate();
189 		oldPackLoop: for (PackFile oldPack : oldPacks) {
190 			String oldName = oldPack.getPackName();
191 			// check whether an old pack file is also among the list of new
192 			// pack files. Then we must not delete it.
193 			for (PackFile newPack : newPacks)
194 				if (oldName.equals(newPack.getPackName()))
195 					continue oldPackLoop;
196 
197 			if (!oldPack.shouldBeKept()
198 					&& oldPack.getPackFile().lastModified() < expireDate) {
199 				oldPack.close();
200 				prunePack(oldName);
201 			}
202 		}
203 		// close the complete object database. Thats my only chance to force
204 		// rescanning and to detect that certain pack files are now deleted.
205 		repo.getObjectDatabase().close();
206 	}
207 
208 	/**
209 	 * Delete files associated with a single pack file. First try to delete the
210 	 * ".pack" file because on some platforms the ".pack" file may be locked and
211 	 * can't be deleted. In such a case it is better to detect this early and
212 	 * give up on deleting files for this packfile. Otherwise we may delete the
213 	 * ".index" file and when failing to delete the ".pack" file we are left
214 	 * with a ".pack" file without a ".index" file.
215 	 *
216 	 * @param packName
217 	 */
218 	private void prunePack(String packName) {
219 		PackExt[] extensions = PackExt.values();
220 		try {
221 			// Delete the .pack file first and if this fails give up on deleting
222 			// the other files
223 			int deleteOptions = FileUtils.RETRY | FileUtils.SKIP_MISSING;
224 			for (PackExt ext : extensions)
225 				if (PackExt.PACK.equals(ext)) {
226 					File f = nameFor(packName, "." + ext.getExtension()); //$NON-NLS-1$
227 					FileUtils.delete(f, deleteOptions);
228 					break;
229 				}
230 			// The .pack file has been deleted. Delete as many as the other
231 			// files as you can.
232 			deleteOptions |= FileUtils.IGNORE_ERRORS;
233 			for (PackExt ext : extensions) {
234 				if (!PackExt.PACK.equals(ext)) {
235 					File f = nameFor(packName, "." + ext.getExtension()); //$NON-NLS-1$
236 					FileUtils.delete(f, deleteOptions);
237 				}
238 			}
239 		} catch (IOException e) {
240 			// Deletion of the .pack file failed. Silently return.
241 		}
242 	}
243 
244 	/**
245 	 * Like "git prune-packed" this method tries to prune all loose objects
246 	 * which can be found in packs. If certain objects can't be pruned (e.g.
247 	 * because the filesystem delete operation fails) this is silently ignored.
248 	 *
249 	 * @throws IOException
250 	 */
251 	public void prunePacked() throws IOException {
252 		ObjectDirectory objdb = repo.getObjectDatabase();
253 		Collection<PackFile> packs = objdb.getPacks();
254 		File objects = repo.getObjectsDirectory();
255 		String[] fanout = objects.list();
256 
257 		if (fanout != null && fanout.length > 0) {
258 			pm.beginTask(JGitText.get().pruneLoosePackedObjects, fanout.length);
259 			try {
260 				for (String d : fanout) {
261 					pm.update(1);
262 					if (d.length() != 2)
263 						continue;
264 					String[] entries = new File(objects, d).list();
265 					if (entries == null)
266 						continue;
267 					for (String e : entries) {
268 						if (e.length() != Constants.OBJECT_ID_STRING_LENGTH - 2)
269 							continue;
270 						ObjectId id;
271 						try {
272 							id = ObjectId.fromString(d + e);
273 						} catch (IllegalArgumentException notAnObject) {
274 							// ignoring the file that does not represent loose
275 							// object
276 							continue;
277 						}
278 						boolean found = false;
279 						for (PackFile p : packs)
280 							if (p.hasObject(id)) {
281 								found = true;
282 								break;
283 							}
284 						if (found)
285 							FileUtils.delete(objdb.fileFor(id), FileUtils.RETRY
286 									| FileUtils.SKIP_MISSING
287 									| FileUtils.IGNORE_ERRORS);
288 					}
289 				}
290 			} finally {
291 				pm.endTask();
292 			}
293 		}
294 	}
295 
296 	/**
297 	 * Like "git prune" this method tries to prune all loose objects which are
298 	 * unreferenced. If certain objects can't be pruned (e.g. because the
299 	 * filesystem delete operation fails) this is silently ignored.
300 	 *
301 	 * @param objectsToKeep
302 	 *            a set of objects which should explicitly not be pruned
303 	 *
304 	 * @throws IOException
305 	 * @throws ParseException
306 	 *             If the configuration parameter "gc.pruneexpire" couldn't be
307 	 *             parsed
308 	 */
309 	public void prune(Set<ObjectId> objectsToKeep) throws IOException,
310 			ParseException {
311 		long expireDate = getExpireDate();
312 
313 		// Collect all loose objects which are old enough, not referenced from
314 		// the index and not in objectsToKeep
315 		Map<ObjectId, File> deletionCandidates = new HashMap<ObjectId, File>();
316 		Set<ObjectId> indexObjects = null;
317 		File objects = repo.getObjectsDirectory();
318 		String[] fanout = objects.list();
319 		if (fanout != null && fanout.length > 0) {
320 			pm.beginTask(JGitText.get().pruneLooseUnreferencedObjects,
321 					fanout.length);
322 			try {
323 				for (String d : fanout) {
324 					pm.update(1);
325 					if (d.length() != 2)
326 						continue;
327 					File[] entries = new File(objects, d).listFiles();
328 					if (entries == null)
329 						continue;
330 					for (File f : entries) {
331 						String fName = f.getName();
332 						if (fName.length() != Constants.OBJECT_ID_STRING_LENGTH - 2)
333 							continue;
334 						if (f.lastModified() >= expireDate)
335 							continue;
336 						try {
337 							ObjectId id = ObjectId.fromString(d + fName);
338 							if (objectsToKeep.contains(id))
339 								continue;
340 							if (indexObjects == null)
341 								indexObjects = listNonHEADIndexObjects();
342 							if (indexObjects.contains(id))
343 								continue;
344 							deletionCandidates.put(id, f);
345 						} catch (IllegalArgumentException notAnObject) {
346 							// ignoring the file that does not represent loose
347 							// object
348 							continue;
349 						}
350 					}
351 				}
352 			} finally {
353 				pm.endTask();
354 			}
355 		}
356 		if (deletionCandidates.isEmpty())
357 			return;
358 
359 		// From the set of current refs remove all those which have been handled
360 		// during last repack(). Only those refs will survive which have been
361 		// added or modified since the last repack. Only these can save existing
362 		// loose refs from being pruned.
363 		Map<String, Ref> newRefs;
364 		if (lastPackedRefs == null || lastPackedRefs.isEmpty())
365 			newRefs = getAllRefs();
366 		else {
367 			newRefs = new HashMap<String, Ref>();
368 			for (Iterator<Map.Entry<String, Ref>> i = getAllRefs().entrySet()
369 					.iterator(); i.hasNext();) {
370 				Entry<String, Ref> newEntry = i.next();
371 				Ref old = lastPackedRefs.get(newEntry.getKey());
372 				if (!equals(newEntry.getValue(), old))
373 					newRefs.put(newEntry.getKey(), newEntry.getValue());
374 			}
375 		}
376 
377 		if (!newRefs.isEmpty()) {
378 			// There are new/modified refs! Check which loose objects are now
379 			// referenced by these modified refs (or their reflogentries).
380 			// Remove these loose objects
381 			// from the deletionCandidates. When the last candidate is removed
382 			// leave this method.
383 			ObjectWalk w = new ObjectWalk(repo);
384 			try {
385 				for (Ref cr : newRefs.values())
386 					w.markStart(w.parseAny(cr.getObjectId()));
387 				if (lastPackedRefs != null)
388 					for (Ref lpr : lastPackedRefs.values())
389 						w.markUninteresting(w.parseAny(lpr.getObjectId()));
390 				removeReferenced(deletionCandidates, w);
391 			} finally {
392 				w.dispose();
393 			}
394 		}
395 
396 		if (deletionCandidates.isEmpty())
397 			return;
398 
399 		// Since we have not left the method yet there are still
400 		// deletionCandidates. Last chance for these objects not to be pruned is
401 		// that they are referenced by reflog entries. Even refs which currently
402 		// point to the same object as during last repack() may have
403 		// additional reflog entries not handled during last repack()
404 		ObjectWalk w = new ObjectWalk(repo);
405 		try {
406 			for (Ref ar : getAllRefs().values())
407 				for (ObjectId id : listRefLogObjects(ar, lastRepackTime))
408 					w.markStart(w.parseAny(id));
409 			if (lastPackedRefs != null)
410 				for (Ref lpr : lastPackedRefs.values())
411 					w.markUninteresting(w.parseAny(lpr.getObjectId()));
412 			removeReferenced(deletionCandidates, w);
413 		} finally {
414 			w.dispose();
415 		}
416 
417 		if (deletionCandidates.isEmpty())
418 			return;
419 
420 		// delete all candidates which have survived: these are unreferenced
421 		// loose objects
422 		for (File f : deletionCandidates.values())
423 			f.delete();
424 
425 		repo.getObjectDatabase().close();
426 	}
427 
428 	private long getExpireDate() throws ParseException {
429 		long expireDate = Long.MAX_VALUE;
430 
431 		if (expire == null && expireAgeMillis == -1) {
432 			String pruneExpireStr = repo.getConfig().getString(
433 					ConfigConstants.CONFIG_GC_SECTION, null,
434 					ConfigConstants.CONFIG_KEY_PRUNEEXPIRE);
435 			if (pruneExpireStr == null)
436 				pruneExpireStr = PRUNE_EXPIRE_DEFAULT;
437 			expire = GitDateParser.parse(pruneExpireStr, null, SystemReader
438 					.getInstance().getLocale());
439 			expireAgeMillis = -1;
440 		}
441 		if (expire != null)
442 			expireDate = expire.getTime();
443 		if (expireAgeMillis != -1)
444 			expireDate = System.currentTimeMillis() - expireAgeMillis;
445 		return expireDate;
446 	}
447 
448 	/**
449 	 * Remove all entries from a map which key is the id of an object referenced
450 	 * by the given ObjectWalk
451 	 *
452 	 * @param id2File
453 	 * @param w
454 	 * @throws MissingObjectException
455 	 * @throws IncorrectObjectTypeException
456 	 * @throws IOException
457 	 */
458 	private void removeReferenced(Map<ObjectId, File> id2File,
459 			ObjectWalk w) throws MissingObjectException,
460 			IncorrectObjectTypeException, IOException {
461 		RevObject ro = w.next();
462 		while (ro != null) {
463 			if (id2File.remove(ro.getId()) != null)
464 				if (id2File.isEmpty())
465 					return;
466 			ro = w.next();
467 		}
468 		ro = w.nextObject();
469 		while (ro != null) {
470 			if (id2File.remove(ro.getId()) != null)
471 				if (id2File.isEmpty())
472 					return;
473 			ro = w.nextObject();
474 		}
475 	}
476 
477 	private static boolean equals(Ref r1, Ref r2) {
478 		if (r1 == null || r2 == null)
479 			return false;
480 		if (r1.isSymbolic()) {
481 			if (!r2.isSymbolic())
482 				return false;
483 			return r1.getTarget().getName().equals(r2.getTarget().getName());
484 		} else {
485 			if (r2.isSymbolic())
486 				return false;
487 			return r1.getObjectId().equals(r2.getObjectId());
488 		}
489 	}
490 
491 	/**
492 	 * Packs all non-symbolic, loose refs into packed-refs.
493 	 *
494 	 * @throws IOException
495 	 */
496 	public void packRefs() throws IOException {
497 		Collection<Ref> refs = repo.getRefDatabase().getRefs(Constants.R_REFS).values();
498 		List<String> refsToBePacked = new ArrayList<String>(refs.size());
499 		pm.beginTask(JGitText.get().packRefs, refs.size());
500 		try {
501 			for (Ref ref : refs) {
502 				if (!ref.isSymbolic() && ref.getStorage().isLoose())
503 					refsToBePacked.add(ref.getName());
504 				pm.update(1);
505 			}
506 			((RefDirectory) repo.getRefDatabase()).pack(refsToBePacked);
507 		} finally {
508 			pm.endTask();
509 		}
510 	}
511 
512 	/**
513 	 * Packs all objects which reachable from any of the heads into one pack
514 	 * file. Additionally all objects which are not reachable from any head but
515 	 * which are reachable from any of the other refs (e.g. tags), special refs
516 	 * (e.g. FETCH_HEAD) or index are packed into a separate pack file. Objects
517 	 * included in pack files which have a .keep file associated are never
518 	 * repacked. All old pack files which existed before are deleted.
519 	 *
520 	 * @return a collection of the newly created pack files
521 	 * @throws IOException
522 	 *             when during reading of refs, index, packfiles, objects,
523 	 *             reflog-entries or during writing to the packfiles
524 	 *             {@link IOException} occurs
525 	 */
526 	public Collection<PackFile> repack() throws IOException {
527 		Collection<PackFile> toBeDeleted = repo.getObjectDatabase().getPacks();
528 
529 		long time = System.currentTimeMillis();
530 		Map<String, Ref> refsBefore = getAllRefs();
531 
532 		Set<ObjectId> allHeads = new HashSet<ObjectId>();
533 		Set<ObjectId> nonHeads = new HashSet<ObjectId>();
534 		Set<ObjectId> tagTargets = new HashSet<ObjectId>();
535 		Set<ObjectId> indexObjects = listNonHEADIndexObjects();
536 
537 		for (Ref ref : refsBefore.values()) {
538 			nonHeads.addAll(listRefLogObjects(ref, 0));
539 			if (ref.isSymbolic() || ref.getObjectId() == null)
540 				continue;
541 			if (ref.getName().startsWith(Constants.R_HEADS))
542 				allHeads.add(ref.getObjectId());
543 			else
544 				nonHeads.add(ref.getObjectId());
545 			if (ref.getPeeledObjectId() != null)
546 				tagTargets.add(ref.getPeeledObjectId());
547 		}
548 
549 		List<ObjectIdSet> excluded = new LinkedList<ObjectIdSet>();
550 		for (final PackFile f : repo.getObjectDatabase().getPacks())
551 			if (f.shouldBeKept())
552 				excluded.add(objectIdSet(f.getIndex()));
553 
554 		tagTargets.addAll(allHeads);
555 		nonHeads.addAll(indexObjects);
556 
557 		List<PackFile> ret = new ArrayList<PackFile>(2);
558 		PackFile heads = null;
559 		if (!allHeads.isEmpty()) {
560 			heads = writePack(allHeads, Collections.<ObjectId> emptySet(),
561 					tagTargets, excluded);
562 			if (heads != null) {
563 				ret.add(heads);
564 				excluded.add(0, objectIdSet(heads.getIndex()));
565 			}
566 		}
567 		if (!nonHeads.isEmpty()) {
568 			PackFile rest = writePack(nonHeads, allHeads, tagTargets, excluded);
569 			if (rest != null)
570 				ret.add(rest);
571 		}
572 		try {
573 			deleteOldPacks(toBeDeleted, ret);
574 		} catch (ParseException e) {
575 			// TODO: the exception has to be wrapped into an IOException because
576 			// throwing the ParseException directly would break the API, instead
577 			// we should throw a ConfigInvalidException
578 			throw new IOException(e);
579 		}
580 		prunePacked();
581 
582 		lastPackedRefs = refsBefore;
583 		lastRepackTime = time;
584 		return ret;
585 	}
586 
587 	/**
588 	 * @param ref
589 	 *            the ref which log should be inspected
590 	 * @param minTime only reflog entries not older then this time are processed
591 	 * @return the {@link ObjectId}s contained in the reflog
592 	 * @throws IOException
593 	 */
594 	private Set<ObjectId> listRefLogObjects(Ref ref, long minTime) throws IOException {
595 		List<ReflogEntry> rlEntries = repo.getReflogReader(ref.getName())
596 				.getReverseEntries();
597 		if (rlEntries == null || rlEntries.isEmpty())
598 			return Collections.<ObjectId> emptySet();
599 		Set<ObjectId> ret = new HashSet<ObjectId>();
600 		for (ReflogEntry e : rlEntries) {
601 			if (e.getWho().getWhen().getTime() < minTime)
602 				break;
603 			ObjectId newId = e.getNewId();
604 			if (newId != null && !ObjectId.zeroId().equals(newId))
605 				ret.add(newId);
606 			ObjectId oldId = e.getOldId();
607 			if (oldId != null && !ObjectId.zeroId().equals(oldId))
608 				ret.add(oldId);
609 		}
610 		return ret;
611 	}
612 
613 	/**
614 	 * Returns a map of all refs and additional refs (e.g. FETCH_HEAD,
615 	 * MERGE_HEAD, ...)
616 	 *
617 	 * @return a map where names of refs point to ref objects
618 	 * @throws IOException
619 	 */
620 	private Map<String, Ref> getAllRefs() throws IOException {
621 		Map<String, Ref> ret = repo.getRefDatabase().getRefs(ALL);
622 		for (Ref ref : repo.getRefDatabase().getAdditionalRefs())
623 			ret.put(ref.getName(), ref);
624 		return ret;
625 	}
626 
627 	/**
628 	 * Return a list of those objects in the index which differ from whats in
629 	 * HEAD
630 	 *
631 	 * @return a set of ObjectIds of changed objects in the index
632 	 * @throws IOException
633 	 * @throws CorruptObjectException
634 	 * @throws NoWorkTreeException
635 	 */
636 	private Set<ObjectId> listNonHEADIndexObjects()
637 			throws CorruptObjectException, IOException {
638 		try {
639 			if (repo.getIndexFile() == null)
640 				return Collections.emptySet();
641 		} catch (NoWorkTreeException e) {
642 			return Collections.emptySet();
643 		}
644 		try (TreeWalk treeWalk = new TreeWalk(repo)) {
645 			treeWalk.addTree(new DirCacheIterator(repo.readDirCache()));
646 			ObjectId headID = repo.resolve(Constants.HEAD);
647 			if (headID != null) {
648 				try (RevWalk revWalk = new RevWalk(repo)) {
649 					treeWalk.addTree(revWalk.parseTree(headID));
650 				}
651 			}
652 
653 			treeWalk.setFilter(TreeFilter.ANY_DIFF);
654 			treeWalk.setRecursive(true);
655 			Set<ObjectId> ret = new HashSet<ObjectId>();
656 
657 			while (treeWalk.next()) {
658 				ObjectId objectId = treeWalk.getObjectId(0);
659 				switch (treeWalk.getRawMode(0) & FileMode.TYPE_MASK) {
660 				case FileMode.TYPE_MISSING:
661 				case FileMode.TYPE_GITLINK:
662 					continue;
663 				case FileMode.TYPE_TREE:
664 				case FileMode.TYPE_FILE:
665 				case FileMode.TYPE_SYMLINK:
666 					ret.add(objectId);
667 					continue;
668 				default:
669 					throw new IOException(MessageFormat.format(
670 							JGitText.get().corruptObjectInvalidMode3,
671 							String.format("%o", //$NON-NLS-1$
672 									Integer.valueOf(treeWalk.getRawMode(0))),
673 							(objectId == null) ? "null" : objectId.name(), //$NON-NLS-1$
674 							treeWalk.getPathString(), //
675 							repo.getIndexFile()));
676 				}
677 			}
678 			return ret;
679 		}
680 	}
681 
682 	private PackFile writePack(Set<? extends ObjectId> want,
683 			Set<? extends ObjectId> have, Set<ObjectId> tagTargets,
684 			List<ObjectIdSet> excludeObjects) throws IOException {
685 		File tmpPack = null;
686 		Map<PackExt, File> tmpExts = new TreeMap<PackExt, File>(
687 				new Comparator<PackExt>() {
688 					public int compare(PackExt o1, PackExt o2) {
689 						// INDEX entries must be returned last, so the pack
690 						// scanner does pick up the new pack until all the
691 						// PackExt entries have been written.
692 						if (o1 == o2)
693 							return 0;
694 						if (o1 == PackExt.INDEX)
695 							return 1;
696 						if (o2 == PackExt.INDEX)
697 							return -1;
698 						return Integer.signum(o1.hashCode() - o2.hashCode());
699 					}
700 
701 				});
702 		try (PackWriter pw = new PackWriter(
703 				(pconfig == null) ? new PackConfig(repo) : pconfig,
704 				repo.newObjectReader())) {
705 			// prepare the PackWriter
706 			pw.setDeltaBaseAsOffset(true);
707 			pw.setReuseDeltaCommits(false);
708 			if (tagTargets != null)
709 				pw.setTagTargets(tagTargets);
710 			if (excludeObjects != null)
711 				for (ObjectIdSet idx : excludeObjects)
712 					pw.excludeObjects(idx);
713 			pw.preparePack(pm, want, have);
714 			if (pw.getObjectCount() == 0)
715 				return null;
716 
717 			// create temporary files
718 			String id = pw.computeName().getName();
719 			File packdir = new File(repo.getObjectsDirectory(), "pack"); //$NON-NLS-1$
720 			tmpPack = File.createTempFile("gc_", ".pack_tmp", packdir); //$NON-NLS-1$ //$NON-NLS-2$
721 			final String tmpBase = tmpPack.getName()
722 					.substring(0, tmpPack.getName().lastIndexOf('.'));
723 			File tmpIdx = new File(packdir, tmpBase + ".idx_tmp"); //$NON-NLS-1$
724 			tmpExts.put(INDEX, tmpIdx);
725 
726 			if (!tmpIdx.createNewFile())
727 				throw new IOException(MessageFormat.format(
728 						JGitText.get().cannotCreateIndexfile, tmpIdx.getPath()));
729 
730 			// write the packfile
731 			FileOutputStream fos = new FileOutputStream(tmpPack);
732 			FileChannel channel = fos.getChannel();
733 			OutputStream channelStream = Channels.newOutputStream(channel);
734 			try {
735 				pw.writePack(pm, pm, channelStream);
736 			} finally {
737 				channel.force(true);
738 				channelStream.close();
739 				fos.close();
740 			}
741 
742 			// write the packindex
743 			fos = new FileOutputStream(tmpIdx);
744 			FileChannel idxChannel = fos.getChannel();
745 			OutputStream idxStream = Channels.newOutputStream(idxChannel);
746 			try {
747 				pw.writeIndex(idxStream);
748 			} finally {
749 				idxChannel.force(true);
750 				idxStream.close();
751 				fos.close();
752 			}
753 
754 			if (pw.prepareBitmapIndex(pm)) {
755 				File tmpBitmapIdx = new File(packdir, tmpBase + ".bitmap_tmp"); //$NON-NLS-1$
756 				tmpExts.put(BITMAP_INDEX, tmpBitmapIdx);
757 
758 				if (!tmpBitmapIdx.createNewFile())
759 					throw new IOException(MessageFormat.format(
760 							JGitText.get().cannotCreateIndexfile,
761 							tmpBitmapIdx.getPath()));
762 
763 				fos = new FileOutputStream(tmpBitmapIdx);
764 				idxChannel = fos.getChannel();
765 				idxStream = Channels.newOutputStream(idxChannel);
766 				try {
767 					pw.writeBitmapIndex(idxStream);
768 				} finally {
769 					idxChannel.force(true);
770 					idxStream.close();
771 					fos.close();
772 				}
773 			}
774 
775 			// rename the temporary files to real files
776 			File realPack = nameFor(id, ".pack"); //$NON-NLS-1$
777 
778 			// if the packfile already exists (because we are rewriting a
779 			// packfile for the same set of objects maybe with different
780 			// PackConfig) then make sure we get rid of all handles on the file.
781 			// Windows will not allow for rename otherwise.
782 			if (realPack.exists())
783 				for (PackFile p : repo.getObjectDatabase().getPacks())
784 					if (realPack.getPath().equals(p.getPackFile().getPath())) {
785 						p.close();
786 						break;
787 					}
788 			tmpPack.setReadOnly();
789 			boolean delete = true;
790 			try {
791 				FileUtils.rename(tmpPack, realPack);
792 				delete = false;
793 				for (Map.Entry<PackExt, File> tmpEntry : tmpExts.entrySet()) {
794 					File tmpExt = tmpEntry.getValue();
795 					tmpExt.setReadOnly();
796 
797 					File realExt = nameFor(
798 							id, "." + tmpEntry.getKey().getExtension()); //$NON-NLS-1$
799 					try {
800 						FileUtils.rename(tmpExt, realExt);
801 					} catch (IOException e) {
802 						File newExt = new File(realExt.getParentFile(),
803 								realExt.getName() + ".new"); //$NON-NLS-1$
804 						if (!tmpExt.renameTo(newExt))
805 							newExt = tmpExt;
806 						throw new IOException(MessageFormat.format(
807 								JGitText.get().panicCantRenameIndexFile, newExt,
808 								realExt));
809 					}
810 				}
811 
812 			} finally {
813 				if (delete) {
814 					if (tmpPack.exists())
815 						tmpPack.delete();
816 					for (File tmpExt : tmpExts.values()) {
817 						if (tmpExt.exists())
818 							tmpExt.delete();
819 					}
820 				}
821 			}
822 			return repo.getObjectDatabase().openPack(realPack);
823 		} finally {
824 			if (tmpPack != null && tmpPack.exists())
825 				tmpPack.delete();
826 			for (File tmpExt : tmpExts.values()) {
827 				if (tmpExt.exists())
828 					tmpExt.delete();
829 			}
830 		}
831 	}
832 
833 	private File nameFor(String name, String ext) {
834 		File packdir = new File(repo.getObjectsDirectory(), "pack"); //$NON-NLS-1$
835 		return new File(packdir, "pack-" + name + ext); //$NON-NLS-1$
836 	}
837 
838 	/**
839 	 * A class holding statistical data for a FileRepository regarding how many
840 	 * objects are stored as loose or packed objects
841 	 */
842 	public class RepoStatistics {
843 		/**
844 		 * The number of objects stored in pack files. If the same object is
845 		 * stored in multiple pack files then it is counted as often as it
846 		 * occurs in pack files.
847 		 */
848 		public long numberOfPackedObjects;
849 
850 		/**
851 		 * The number of pack files
852 		 */
853 		public long numberOfPackFiles;
854 
855 		/**
856 		 * The number of objects stored as loose objects.
857 		 */
858 		public long numberOfLooseObjects;
859 
860 		/**
861 		 * The sum of the sizes of all files used to persist loose objects.
862 		 */
863 		public long sizeOfLooseObjects;
864 
865 		/**
866 		 * The sum of the sizes of all pack files.
867 		 */
868 		public long sizeOfPackedObjects;
869 
870 		/**
871 		 * The number of loose refs.
872 		 */
873 		public long numberOfLooseRefs;
874 
875 		/**
876 		 * The number of refs stored in pack files.
877 		 */
878 		public long numberOfPackedRefs;
879 
880 		public String toString() {
881 			final StringBuilder b = new StringBuilder();
882 			b.append("numberOfPackedObjects=").append(numberOfPackedObjects); //$NON-NLS-1$
883 			b.append(", numberOfPackFiles=").append(numberOfPackFiles); //$NON-NLS-1$
884 			b.append(", numberOfLooseObjects=").append(numberOfLooseObjects); //$NON-NLS-1$
885 			b.append(", numberOfLooseRefs=").append(numberOfLooseRefs); //$NON-NLS-1$
886 			b.append(", numberOfPackedRefs=").append(numberOfPackedRefs); //$NON-NLS-1$
887 			b.append(", sizeOfLooseObjects=").append(sizeOfLooseObjects); //$NON-NLS-1$
888 			b.append(", sizeOfPackedObjects=").append(sizeOfPackedObjects); //$NON-NLS-1$
889 			return b.toString();
890 		}
891 	}
892 
893 	/**
894 	 * Returns the number of objects stored in pack files. If an object is
895 	 * contained in multiple pack files it is counted as often as it occurs.
896 	 *
897 	 * @return the number of objects stored in pack files
898 	 * @throws IOException
899 	 */
900 	public RepoStatistics getStatistics() throws IOException {
901 		RepoStatistics ret = new RepoStatistics();
902 		Collection<PackFile> packs = repo.getObjectDatabase().getPacks();
903 		for (PackFile f : packs) {
904 			ret.numberOfPackedObjects += f.getIndex().getObjectCount();
905 			ret.numberOfPackFiles++;
906 			ret.sizeOfPackedObjects += f.getPackFile().length();
907 		}
908 		File objDir = repo.getObjectsDirectory();
909 		String[] fanout = objDir.list();
910 		if (fanout != null && fanout.length > 0) {
911 			for (String d : fanout) {
912 				if (d.length() != 2)
913 					continue;
914 				File[] entries = new File(objDir, d).listFiles();
915 				if (entries == null)
916 					continue;
917 				for (File f : entries) {
918 					if (f.getName().length() != Constants.OBJECT_ID_STRING_LENGTH - 2)
919 						continue;
920 					ret.numberOfLooseObjects++;
921 					ret.sizeOfLooseObjects += f.length();
922 				}
923 			}
924 		}
925 
926 		RefDatabase refDb = repo.getRefDatabase();
927 		for (Ref r : refDb.getRefs(RefDatabase.ALL).values()) {
928 			Storage storage = r.getStorage();
929 			if (storage == Storage.LOOSE || storage == Storage.LOOSE_PACKED)
930 				ret.numberOfLooseRefs++;
931 			if (storage == Storage.PACKED || storage == Storage.LOOSE_PACKED)
932 				ret.numberOfPackedRefs++;
933 		}
934 
935 		return ret;
936 	}
937 
938 	/**
939 	 * Set the progress monitor used for garbage collection methods.
940 	 *
941 	 * @param pm
942 	 * @return this
943 	 */
944 	public GC setProgressMonitor(ProgressMonitor pm) {
945 		this.pm = (pm == null) ? NullProgressMonitor.INSTANCE : pm;
946 		return this;
947 	}
948 
949 	/**
950 	 * During gc() or prune() each unreferenced, loose object which has been
951 	 * created or modified in the last <code>expireAgeMillis</code> milliseconds
952 	 * will not be pruned. Only older objects may be pruned. If set to 0 then
953 	 * every object is a candidate for pruning.
954 	 *
955 	 * @param expireAgeMillis
956 	 *            minimal age of objects to be pruned in milliseconds.
957 	 */
958 	public void setExpireAgeMillis(long expireAgeMillis) {
959 		this.expireAgeMillis = expireAgeMillis;
960 		expire = null;
961 	}
962 
963 	/**
964 	 * Set the PackConfig used when (re-)writing packfiles. This allows to
965 	 * influence how packs are written and to implement something similar to
966 	 * "git gc --aggressive"
967 	 *
968 	 * @since 3.6
969 	 * @param pconfig
970 	 *            the {@link PackConfig} used when writing packs
971 	 */
972 	public void setPackConfig(PackConfig pconfig) {
973 		this.pconfig = pconfig;
974 	}
975 
976 	/**
977 	 * During gc() or prune() each unreferenced, loose object which has been
978 	 * created or modified after or at <code>expire</code> will not be pruned.
979 	 * Only older objects may be pruned. If set to null then every object is a
980 	 * candidate for pruning.
981 	 *
982 	 * @param expire
983 	 *            instant in time which defines object expiration
984 	 *            objects with modification time before this instant are expired
985 	 *            objects with modification time newer or equal to this instant
986 	 *            are not expired
987 	 */
988 	public void setExpire(Date expire) {
989 		this.expire = expire;
990 		expireAgeMillis = -1;
991 	}
992 
993 	private static ObjectIdSet objectIdSet(final PackIndex idx) {
994 		return new ObjectIdSet() {
995 			public boolean contains(AnyObjectId objectId) {
996 				return idx.hasObject(objectId);
997 			}
998 		};
999 	}
1000 }