View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Collection;
50  import java.util.Collections;
51  import java.util.Comparator;
52  import java.util.HashMap;
53  import java.util.List;
54  import java.util.Map;
55  import java.util.Set;
56  import java.util.concurrent.atomic.AtomicReference;
57  
58  import org.eclipse.jgit.internal.storage.pack.PackExt;
59  import org.eclipse.jgit.lib.AnyObjectId;
60  import org.eclipse.jgit.lib.ObjectDatabase;
61  import org.eclipse.jgit.lib.ObjectInserter;
62  import org.eclipse.jgit.lib.ObjectReader;
63  
64  /**
65   * Manages objects stored in
66   * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage
67   * system.
68   */
69  public abstract class DfsObjDatabase extends ObjectDatabase {
70  	private static final PackList NO_PACKS = new PackList(
71  			new DfsPackFile[0],
72  			new DfsReftable[0]) {
73  		@Override
74  		boolean dirty() {
75  			return true;
76  		}
77  
78  		@Override
79  		void clearDirty() {
80  			// Always dirty.
81  		}
82  
83  		@Override
84  		public void markDirty() {
85  			// Always dirty.
86  		}
87  	};
88  
89  	/** Sources for a pack file. */
90  	public static enum PackSource {
91  		/** The pack is created by ObjectInserter due to local activity. */
92  		INSERT(0),
93  
94  		/**
95  		 * The pack is created by PackParser due to a network event.
96  		 * <p>
97  		 * A received pack can be from either a push into the repository, or a
98  		 * fetch into the repository, the direction doesn't matter. A received
99  		 * pack was built by the remote Git implementation and may not match the
100 		 * storage layout preferred by this version. Received packs are likely
101 		 * to be either compacted or garbage collected in the future.
102 		 */
103 		RECEIVE(0),
104 
105 		/**
106 		 * The pack was created by compacting multiple packs together.
107 		 * <p>
108 		 * Packs created by compacting multiple packs together aren't nearly as
109 		 * efficient as a fully garbage collected repository, but may save disk
110 		 * space by reducing redundant copies of base objects.
111 		 *
112 		 * @see DfsPackCompactor
113 		 */
114 		COMPACT(1),
115 
116 		/**
117 		 * Pack was created by Git garbage collection by this implementation.
118 		 * <p>
119 		 * This source is only used by the {@link DfsGarbageCollector} when it
120 		 * builds a pack file by traversing the object graph and copying all
121 		 * reachable objects into a new pack stream.
122 		 *
123 		 * @see DfsGarbageCollector
124 		 */
125 		GC(2),
126 
127 		/** Created from non-heads by {@link DfsGarbageCollector}. */
128 		GC_REST(3),
129 
130 		/**
131 		 * RefTreeGraph pack was created by Git garbage collection.
132 		 *
133 		 * @see DfsGarbageCollector
134 		 */
135 		GC_TXN(4),
136 
137 		/**
138 		 * Pack was created by Git garbage collection.
139 		 * <p>
140 		 * This pack contains only unreachable garbage that was found during the
141 		 * last GC pass. It is retained in a new pack until it is safe to prune
142 		 * these objects from the repository.
143 		 */
144 		UNREACHABLE_GARBAGE(5);
145 
146 		final int category;
147 
148 		PackSource(int category) {
149 			this.category = category;
150 		}
151 	}
152 
153 	private final AtomicReference<PackList> packList;
154 
155 	private final DfsRepository repository;
156 
157 	private DfsReaderOptions readerOptions;
158 
159 	/**
160 	 * Initialize an object database for our repository.
161 	 *
162 	 * @param repository
163 	 *            repository owning this object database.
164 	 * @param options
165 	 *            how readers should access the object database.
166 	 */
167 	protected DfsObjDatabase(DfsRepository repository,
168 			DfsReaderOptions options) {
169 		this.repository = repository;
170 		this.packList = new AtomicReference<>(NO_PACKS);
171 		this.readerOptions = options;
172 	}
173 
174 	/**
175 	 * Get configured reader options, such as read-ahead.
176 	 *
177 	 * @return configured reader options, such as read-ahead.
178 	 */
179 	public DfsReaderOptions getReaderOptions() {
180 		return readerOptions;
181 	}
182 
183 	/** {@inheritDoc} */
184 	@Override
185 	public DfsReader newReader() {
186 		return new DfsReader(this);
187 	}
188 
189 	/** {@inheritDoc} */
190 	@Override
191 	public ObjectInserter newInserter() {
192 		return new DfsInserter(this);
193 	}
194 
195 	/**
196 	 * Scan and list all available pack files in the repository.
197 	 *
198 	 * @return list of available packs. The returned array is shared with the
199 	 *         implementation and must not be modified by the caller.
200 	 * @throws java.io.IOException
201 	 *             the pack list cannot be initialized.
202 	 */
203 	public DfsPackFile[] getPacks() throws IOException {
204 		return getPackList().packs;
205 	}
206 
207 	/**
208 	 * Scan and list all available reftable files in the repository.
209 	 *
210 	 * @return list of available reftables. The returned array is shared with
211 	 *         the implementation and must not be modified by the caller.
212 	 * @throws java.io.IOException
213 	 *             the pack list cannot be initialized.
214 	 */
215 	public DfsReftable[] getReftables() throws IOException {
216 		return getPackList().reftables;
217 	}
218 
219 	/**
220 	 * Scan and list all available pack files in the repository.
221 	 *
222 	 * @return list of available packs, with some additional metadata. The
223 	 *         returned array is shared with the implementation and must not be
224 	 *         modified by the caller.
225 	 * @throws java.io.IOException
226 	 *             the pack list cannot be initialized.
227 	 */
228 	public PackList getPackList() throws IOException {
229 		return scanPacks(NO_PACKS);
230 	}
231 
232 	/**
233 	 * Get repository owning this object database.
234 	 *
235 	 * @return repository owning this object database.
236 	 */
237 	protected DfsRepository getRepository() {
238 		return repository;
239 	}
240 
241 	/**
242 	 * List currently known pack files in the repository, without scanning.
243 	 *
244 	 * @return list of available packs. The returned array is shared with the
245 	 *         implementation and must not be modified by the caller.
246 	 */
247 	public DfsPackFile[] getCurrentPacks() {
248 		return getCurrentPackList().packs;
249 	}
250 
251 	/**
252 	 * List currently known reftable files in the repository, without scanning.
253 	 *
254 	 * @return list of available reftables. The returned array is shared with
255 	 *         the implementation and must not be modified by the caller.
256 	 */
257 	public DfsReftable[] getCurrentReftables() {
258 		return getCurrentPackList().reftables;
259 	}
260 
261 	/**
262 	 * List currently known pack files in the repository, without scanning.
263 	 *
264 	 * @return list of available packs, with some additional metadata. The
265 	 *         returned array is shared with the implementation and must not be
266 	 *         modified by the caller.
267 	 */
268 	public PackList getCurrentPackList() {
269 		return packList.get();
270 	}
271 
272 	/**
273 	 * Does the requested object exist in this database?
274 	 * <p>
275 	 * This differs from ObjectDatabase's implementation in that we can selectively
276 	 * ignore unreachable (garbage) objects.
277 	 *
278 	 * @param objectId
279 	 *            identity of the object to test for existence of.
280 	 * @param avoidUnreachableObjects
281 	 *            if true, ignore objects that are unreachable.
282 	 * @return true if the specified object is stored in this database.
283 	 * @throws java.io.IOException
284 	 *             the object store cannot be accessed.
285 	 */
286 	public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
287 			throws IOException {
288 		try (ObjectReader or = newReader()) {
289 			or.setAvoidUnreachableObjects(avoidUnreachableObjects);
290 			return or.has(objectId);
291 		}
292 	}
293 
294 	/**
295 	 * Generate a new unique name for a pack file.
296 	 *
297 	 * @param source
298 	 *            where the pack stream is created.
299 	 * @return a unique name for the pack file. Must not collide with any other
300 	 *         pack file name in the same DFS.
301 	 * @throws java.io.IOException
302 	 *             a new unique pack description cannot be generated.
303 	 */
304 	protected abstract DfsPackDescription newPack(PackSource source)
305 			throws IOException;
306 
307 	/**
308 	 * Generate a new unique name for a pack file.
309 	 *
310 	 * <p>
311 	 * Default implementation of this method would be equivalent to
312 	 * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the
313 	 * clients can override this method to use the given
314 	 * {@code estomatedPackSize} value more efficiently in the process of
315 	 * creating a new
316 	 * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object.
317 	 *
318 	 * @param source
319 	 *            where the pack stream is created.
320 	 * @param estimatedPackSize
321 	 *            the estimated size of the pack.
322 	 * @return a unique name for the pack file. Must not collide with any other
323 	 *         pack file name in the same DFS.
324 	 * @throws java.io.IOException
325 	 *             a new unique pack description cannot be generated.
326 	 */
327 	protected DfsPackDescription newPack(PackSource source,
328 			long estimatedPackSize) throws IOException {
329 		DfsPackDescription pack = newPack(source);
330 		pack.setEstimatedPackSize(estimatedPackSize);
331 		return pack;
332 	}
333 
334 	/**
335 	 * Commit a pack and index pair that was written to the DFS.
336 	 * <p>
337 	 * Committing the pack/index pair makes them visible to readers. The JGit
338 	 * DFS code always writes the pack, then the index. This allows a simple
339 	 * commit process to do nothing if readers always look for both files to
340 	 * exist and the DFS performs atomic creation of the file (e.g. stream to a
341 	 * temporary file and rename to target on close).
342 	 * <p>
343 	 * During pack compaction or GC the new pack file may be replacing other
344 	 * older files. Implementations should remove those older files (if any) as
345 	 * part of the commit of the new file.
346 	 * <p>
347 	 * This method is a trivial wrapper around
348 	 * {@link #commitPackImpl(Collection, Collection)} that calls the
349 	 * implementation and fires events.
350 	 *
351 	 * @param desc
352 	 *            description of the new packs.
353 	 * @param replaces
354 	 *            if not null, list of packs to remove.
355 	 * @throws java.io.IOException
356 	 *             the packs cannot be committed. On failure a rollback must
357 	 *             also be attempted by the caller.
358 	 */
359 	protected void commitPack(Collection<DfsPackDescription> desc,
360 			Collection<DfsPackDescription> replaces) throws IOException {
361 		commitPackImpl(desc, replaces);
362 		getRepository().fireEvent(new DfsPacksChangedEvent());
363 	}
364 
365 	/**
366 	 * Implementation of pack commit.
367 	 *
368 	 * @see #commitPack(Collection, Collection)
369 	 * @param desc
370 	 *            description of the new packs.
371 	 * @param replaces
372 	 *            if not null, list of packs to remove.
373 	 * @throws java.io.IOException
374 	 *             the packs cannot be committed.
375 	 */
376 	protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
377 			Collection<DfsPackDescription> replaces) throws IOException;
378 
379 	/**
380 	 * Try to rollback a pack creation.
381 	 * <p>
382 	 * JGit DFS always writes the pack first, then the index. If the pack does
383 	 * not yet exist, then neither does the index. A safe DFS implementation
384 	 * would try to remove both files to ensure they are really gone.
385 	 * <p>
386 	 * A rollback does not support failures, as it only occurs when there is
387 	 * already a failure in progress. A DFS implementor may wish to log
388 	 * warnings/error messages when a rollback fails, but should not send new
389 	 * exceptions up the Java callstack.
390 	 *
391 	 * @param desc
392 	 *            pack to delete.
393 	 */
394 	protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
395 
396 	/**
397 	 * List the available pack files.
398 	 * <p>
399 	 * The returned list must support random access and must be mutable by the
400 	 * caller. It is sorted in place using the natural sorting of the returned
401 	 * DfsPackDescription objects.
402 	 *
403 	 * @return available packs. May be empty if there are no packs.
404 	 * @throws java.io.IOException
405 	 *             the packs cannot be listed and the object database is not
406 	 *             functional to the caller.
407 	 */
408 	protected abstract List<DfsPackDescription> listPacks() throws IOException;
409 
410 	/**
411 	 * Open a pack, pack index, or other related file for reading.
412 	 *
413 	 * @param desc
414 	 *            description of pack related to the data that will be read.
415 	 *            This is an instance previously obtained from
416 	 *            {@link #listPacks()}, but not necessarily from the same
417 	 *            DfsObjDatabase instance.
418 	 * @param ext
419 	 *            file extension that will be read i.e "pack" or "idx".
420 	 * @return channel to read the file.
421 	 * @throws java.io.FileNotFoundException
422 	 *             the file does not exist.
423 	 * @throws java.io.IOException
424 	 *             the file cannot be opened.
425 	 */
426 	protected abstract ReadableChannel openFile(
427 			DfsPackDescription desc, PackExt ext)
428 			throws FileNotFoundException, IOException;
429 
430 	/**
431 	 * Open a pack, pack index, or other related file for writing.
432 	 *
433 	 * @param desc
434 	 *            description of pack related to the data that will be written.
435 	 *            This is an instance previously obtained from
436 	 *            {@link #newPack(PackSource)}.
437 	 * @param ext
438 	 *            file extension that will be written i.e "pack" or "idx".
439 	 * @return channel to write the file.
440 	 * @throws java.io.IOException
441 	 *             the file cannot be opened.
442 	 */
443 	protected abstract DfsOutputStream writeFile(
444 			DfsPackDescription desc, PackExt ext) throws IOException;
445 
446 	void addPack(DfsPackFile newPack) throws IOException {
447 		PackList o, n;
448 		do {
449 			o = packList.get();
450 			if (o == NO_PACKS) {
451 				// The repository may not have needed any existing objects to
452 				// complete the current task of creating a pack (e.g. push of a
453 				// pack with no external deltas). Because we don't scan for
454 				// newly added packs on missed object lookups, scan now to
455 				// make sure all older packs are available in the packList.
456 				o = scanPacks(o);
457 
458 				// Its possible the scan identified the pack we were asked to
459 				// add, as the pack was already committed via commitPack().
460 				// If this is the case return without changing the list.
461 				for (DfsPackFile p : o.packs) {
462 					if (p.key.equals(newPack.key)) {
463 						return;
464 					}
465 				}
466 			}
467 
468 			DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
469 			packs[0] = newPack;
470 			System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
471 			n = new PackListImpl(packs, o.reftables);
472 		} while (!packList.compareAndSet(o, n));
473 	}
474 
475 	void addReftable(DfsPackDescription add, Set<DfsPackDescription> remove)
476 			throws IOException {
477 		PackList o, n;
478 		do {
479 			o = packList.get();
480 			if (o == NO_PACKS) {
481 				o = scanPacks(o);
482 				for (DfsReftable t : o.reftables) {
483 					if (t.getPackDescription().equals(add)) {
484 						return;
485 					}
486 				}
487 			}
488 
489 			List<DfsReftable> tables = new ArrayList<>(1 + o.reftables.length);
490 			for (DfsReftable t : o.reftables) {
491 				if (!remove.contains(t.getPackDescription())) {
492 					tables.add(t);
493 				}
494 			}
495 			tables.add(new DfsReftable(add));
496 			n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0]));
497 		} while (!packList.compareAndSet(o, n));
498 	}
499 
500 	PackList scanPacks(PackList original) throws IOException {
501 		PackList o, n;
502 		synchronized (packList) {
503 			do {
504 				o = packList.get();
505 				if (o != original) {
506 					// Another thread did the scan for us, while we
507 					// were blocked on the monitor above.
508 					//
509 					return o;
510 				}
511 				n = scanPacksImpl(o);
512 				if (n == o)
513 					return n;
514 			} while (!packList.compareAndSet(o, n));
515 		}
516 		getRepository().fireEvent(new DfsPacksChangedEvent());
517 		return n;
518 	}
519 
520 	private PackList scanPacksImpl(PackList old) throws IOException {
521 		DfsBlockCache cache = DfsBlockCache.getInstance();
522 		Map<DfsPackDescription, DfsPackFile> packs = packMap(old);
523 		Map<DfsPackDescription, DfsReftable> reftables = reftableMap(old);
524 
525 		List<DfsPackDescription> scanned = listPacks();
526 		Collections.sort(scanned);
527 
528 		List<DfsPackFile> newPacks = new ArrayList<>(scanned.size());
529 		List<DfsReftable> newReftables = new ArrayList<>(scanned.size());
530 		boolean foundNew = false;
531 		for (DfsPackDescription dsc : scanned) {
532 			DfsPackFile oldPack = packs.remove(dsc);
533 			if (oldPack != null) {
534 				newPacks.add(oldPack);
535 			} else if (dsc.hasFileExt(PackExt.PACK)) {
536 				newPacks.add(new DfsPackFile(cache, dsc));
537 				foundNew = true;
538 			}
539 
540 			DfsReftable oldReftable = reftables.remove(dsc);
541 			if (oldReftable != null) {
542 				newReftables.add(oldReftable);
543 			} else if (dsc.hasFileExt(PackExt.REFTABLE)) {
544 				newReftables.add(new DfsReftable(cache, dsc));
545 				foundNew = true;
546 			}
547 		}
548 
549 		if (newPacks.isEmpty() && newReftables.isEmpty())
550 			return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables);
551 		if (!foundNew) {
552 			old.clearDirty();
553 			return old;
554 		}
555 		Collections.sort(newReftables, reftableComparator());
556 		return new PackListImpl(
557 				newPacks.toArray(new DfsPackFile[0]),
558 				newReftables.toArray(new DfsReftable[0]));
559 	}
560 
561 	private static Map<DfsPackDescription, DfsPackFile> packMap(PackList old) {
562 		Map<DfsPackDescription, DfsPackFile> forReuse = new HashMap<>();
563 		for (DfsPackFile p : old.packs) {
564 			if (!p.invalid()) {
565 				forReuse.put(p.desc, p);
566 			}
567 		}
568 		return forReuse;
569 	}
570 
571 	private static Map<DfsPackDescription, DfsReftable> reftableMap(PackList old) {
572 		Map<DfsPackDescription, DfsReftable> forReuse = new HashMap<>();
573 		for (DfsReftable p : old.reftables) {
574 			if (!p.invalid()) {
575 				forReuse.put(p.desc, p);
576 			}
577 		}
578 		return forReuse;
579 	}
580 
581 	/**
582 	 * Get comparator to sort {@link DfsReftable} by priority.
583 	 *
584 	 * @return comparator to sort {@link DfsReftable} by priority.
585 	 */
586 	protected Comparator<DfsReftable> reftableComparator() {
587 		return (fa, fb) -> {
588 			DfsPackDescription a = fa.getPackDescription();
589 			DfsPackDescription b = fb.getPackDescription();
590 
591 			// GC, COMPACT reftables first by higher category.
592 			int c = category(b) - category(a);
593 			if (c != 0) {
594 				return c;
595 			}
596 
597 			// Lower maxUpdateIndex first.
598 			c = Long.signum(a.getMaxUpdateIndex() - b.getMaxUpdateIndex());
599 			if (c != 0) {
600 				return c;
601 			}
602 
603 			// Older reftable first.
604 			return Long.signum(a.getLastModified() - b.getLastModified());
605 		};
606 	}
607 
608 	static int category(DfsPackDescription d) {
609 		PackSource s = d.getPackSource();
610 		return s != null ? s.category : 0;
611 	}
612 
613 	/**
614 	 * Clears the cached list of packs, forcing them to be scanned again.
615 	 */
616 	protected void clearCache() {
617 		packList.set(NO_PACKS);
618 	}
619 
620 	/** {@inheritDoc} */
621 	@Override
622 	public void close() {
623 		packList.set(NO_PACKS);
624 	}
625 
626 	/** Snapshot of packs scanned in a single pass. */
627 	public static abstract class PackList {
628 		/** All known packs, sorted. */
629 		public final DfsPackFile[] packs;
630 
631 		/** All known reftables, sorted. */
632 		public final DfsReftable[] reftables;
633 
634 		private long lastModified = -1;
635 
636 		PackList(DfsPackFile[] packs, DfsReftable[] reftables) {
637 			this.packs = packs;
638 			this.reftables = reftables;
639 		}
640 
641 		/** @return last modified time of all packs, in milliseconds. */
642 		public long getLastModified() {
643 			if (lastModified < 0) {
644 				long max = 0;
645 				for (DfsPackFile pack : packs) {
646 					max = Math.max(max, pack.getPackDescription().getLastModified());
647 				}
648 				lastModified = max;
649 			}
650 			return lastModified;
651 		}
652 
653 		abstract boolean dirty();
654 		abstract void clearDirty();
655 
656 		/**
657 		 * Mark pack list as dirty.
658 		 * <p>
659 		 * Used when the caller knows that new data might have been written to the
660 		 * repository that could invalidate open readers depending on this pack list,
661 		 * for example if refs are newly scanned.
662 		 */
663 		public abstract void markDirty();
664 	}
665 
666 	private static final class PackListImpl extends PackList {
667 		private volatile boolean dirty;
668 
669 		PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) {
670 			super(packs, reftables);
671 		}
672 
673 		@Override
674 		boolean dirty() {
675 			return dirty;
676 		}
677 
678 		@Override
679 		void clearDirty() {
680 			dirty = false;
681 		}
682 
683 		@Override
684 		public void markDirty() {
685 			dirty = true;
686 		}
687 	}
688 }