View Javadoc
1   /*
2    * Copyright (C) 2011, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.dfs;
45  
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.Collection;
50  import java.util.Collections;
51  import java.util.HashMap;
52  import java.util.List;
53  import java.util.Map;
54  import java.util.concurrent.atomic.AtomicReference;
55  
56  import org.eclipse.jgit.internal.storage.pack.PackExt;
57  import org.eclipse.jgit.lib.ObjectDatabase;
58  import org.eclipse.jgit.lib.ObjectInserter;
59  import org.eclipse.jgit.lib.ObjectReader;
60  
61  /** Manages objects stored in {@link DfsPackFile} on a storage system. */
62  public abstract class DfsObjDatabase extends ObjectDatabase {
63  	private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]);
64  
65  	/** Sources for a pack file. */
66  	public static enum PackSource {
67  		/** The pack is created by ObjectInserter due to local activity. */
68  		INSERT(0),
69  
70  		/**
71  		 * The pack is created by PackParser due to a network event.
72  		 * <p>
73  		 * A received pack can be from either a push into the repository, or a
74  		 * fetch into the repository, the direction doesn't matter. A received
75  		 * pack was built by the remote Git implementation and may not match the
76  		 * storage layout preferred by this version. Received packs are likely
77  		 * to be either compacted or garbage collected in the future.
78  		 */
79  		RECEIVE(0),
80  
81  		/**
82  		 * Pack was created by Git garbage collection by this implementation.
83  		 * <p>
84  		 * This source is only used by the {@link DfsGarbageCollector} when it
85  		 * builds a pack file by traversing the object graph and copying all
86  		 * reachable objects into a new pack stream.
87  		 *
88  		 * @see DfsGarbageCollector
89  		 */
90  		GC(1),
91  
92  		/**
93  		 * The pack was created by compacting multiple packs together.
94  		 * <p>
95  		 * Packs created by compacting multiple packs together aren't nearly as
96  		 * efficient as a fully garbage collected repository, but may save disk
97  		 * space by reducing redundant copies of base objects.
98  		 *
99  		 * @see DfsPackCompactor
100 		 */
101 		COMPACT(1),
102 
103 		/**
104 		 * Pack was created by Git garbage collection.
105 		 * <p>
106 		 * This pack contains only unreachable garbage that was found during the
107 		 * last GC pass. It is retained in a new pack until it is safe to prune
108 		 * these objects from the repository.
109 		 */
110 		UNREACHABLE_GARBAGE(2);
111 
112 		final int category;
113 
114 		PackSource(int category) {
115 			this.category = category;
116 		}
117 	}
118 
119 	private final AtomicReference<PackList> packList;
120 
121 	private final DfsRepository repository;
122 
123 	private DfsReaderOptions readerOptions;
124 
125 	/**
126 	 * Initialize an object database for our repository.
127 	 *
128 	 * @param repository
129 	 *            repository owning this object database.
130 	 *
131 	 * @param options
132 	 *            how readers should access the object database.
133 	 */
134 	protected DfsObjDatabase(DfsRepository repository,
135 			DfsReaderOptions options) {
136 		this.repository = repository;
137 		this.packList = new AtomicReference<PackList>(NO_PACKS);
138 		this.readerOptions = options;
139 	}
140 
141 	/** @return configured reader options, such as read-ahead. */
142 	public DfsReaderOptions getReaderOptions() {
143 		return readerOptions;
144 	}
145 
146 	@Override
147 	public ObjectReader newReader() {
148 		return new DfsReader(this);
149 	}
150 
151 	@Override
152 	public ObjectInserter newInserter() {
153 		return new DfsInserter(this);
154 	}
155 
156 	/**
157 	 * Scan and list all available pack files in the repository.
158 	 *
159 	 * @return list of available packs. The returned array is shared with the
160 	 *         implementation and must not be modified by the caller.
161 	 * @throws IOException
162 	 *             the pack list cannot be initialized.
163 	 */
164 	public DfsPackFile[] getPacks() throws IOException {
165 		return scanPacks(NO_PACKS).packs;
166 	}
167 
168 	/** @return repository owning this object database. */
169 	protected DfsRepository getRepository() {
170 		return repository;
171 	}
172 
173 	/**
174 	 * List currently known pack files in the repository, without scanning.
175 	 *
176 	 * @return list of available packs. The returned array is shared with the
177 	 *         implementation and must not be modified by the caller.
178 	 */
179 	public DfsPackFile[] getCurrentPacks() {
180 		return packList.get().packs;
181 	}
182 
183 	/**
184 	 * Generate a new unique name for a pack file.
185 	 *
186 	 * @param source
187 	 *            where the pack stream is created.
188 	 * @return a unique name for the pack file. Must not collide with any other
189 	 *         pack file name in the same DFS.
190 	 * @throws IOException
191 	 *             a new unique pack description cannot be generated.
192 	 */
193 	protected abstract DfsPackDescription newPack(PackSource source)
194 			throws IOException;
195 
196 	/**
197 	 * Commit a pack and index pair that was written to the DFS.
198 	 * <p>
199 	 * Committing the pack/index pair makes them visible to readers. The JGit
200 	 * DFS code always writes the pack, then the index. This allows a simple
201 	 * commit process to do nothing if readers always look for both files to
202 	 * exist and the DFS performs atomic creation of the file (e.g. stream to a
203 	 * temporary file and rename to target on close).
204 	 * <p>
205 	 * During pack compaction or GC the new pack file may be replacing other
206 	 * older files. Implementations should remove those older files (if any) as
207 	 * part of the commit of the new file.
208 	 * <p>
209 	 * This method is a trivial wrapper around
210 	 * {@link #commitPackImpl(Collection, Collection)} that calls the
211 	 * implementation and fires events.
212 	 *
213 	 * @param desc
214 	 *            description of the new packs.
215 	 * @param replaces
216 	 *            if not null, list of packs to remove.
217 	 * @throws IOException
218 	 *             the packs cannot be committed. On failure a rollback must
219 	 *             also be attempted by the caller.
220 	 */
221 	protected void commitPack(Collection<DfsPackDescription> desc,
222 			Collection<DfsPackDescription> replaces) throws IOException {
223 		commitPackImpl(desc, replaces);
224 		getRepository().fireEvent(new DfsPacksChangedEvent());
225 	}
226 
227 	/**
228 	 * Implementation of pack commit.
229 	 *
230 	 * @see #commitPack(Collection, Collection)
231 	 *
232 	 * @param desc
233 	 *            description of the new packs.
234 	 * @param replaces
235 	 *            if not null, list of packs to remove.
236 	 * @throws IOException
237 	 *             the packs cannot be committed.
238 	 */
239 	protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
240 			Collection<DfsPackDescription> replaces) throws IOException;
241 
242 	/**
243 	 * Try to rollback a pack creation.
244 	 * <p>
245 	 * JGit DFS always writes the pack first, then the index. If the pack does
246 	 * not yet exist, then neither does the index. A safe DFS implementation
247 	 * would try to remove both files to ensure they are really gone.
248 	 * <p>
249 	 * A rollback does not support failures, as it only occurs when there is
250 	 * already a failure in progress. A DFS implementor may wish to log
251 	 * warnings/error messages when a rollback fails, but should not send new
252 	 * exceptions up the Java callstack.
253 	 *
254 	 * @param desc
255 	 *            pack to delete.
256 	 */
257 	protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
258 
259 	/**
260 	 * List the available pack files.
261 	 * <p>
262 	 * The returned list must support random access and must be mutable by the
263 	 * caller. It is sorted in place using the natural sorting of the returned
264 	 * DfsPackDescription objects.
265 	 *
266 	 * @return available packs. May be empty if there are no packs.
267 	 * @throws IOException
268 	 *             the packs cannot be listed and the object database is not
269 	 *             functional to the caller.
270 	 */
271 	protected abstract List<DfsPackDescription> listPacks() throws IOException;
272 
273 	/**
274 	 * Open a pack, pack index, or other related file for reading.
275 	 *
276 	 * @param desc
277 	 *            description of pack related to the data that will be read.
278 	 *            This is an instance previously obtained from
279 	 *            {@link #listPacks()}, but not necessarily from the same
280 	 *            DfsObjDatabase instance.
281 	 * @param ext
282 	 *            file extension that will be read i.e "pack" or "idx".
283 	 * @return channel to read the file.
284 	 * @throws FileNotFoundException
285 	 *             the file does not exist.
286 	 * @throws IOException
287 	 *             the file cannot be opened.
288 	 */
289 	protected abstract ReadableChannel openFile(
290 			DfsPackDescription desc, PackExt ext)
291 			throws FileNotFoundException, IOException;
292 
293 	/**
294 	 * Open a pack, pack index, or other related file for writing.
295 	 *
296 	 * @param desc
297 	 *            description of pack related to the data that will be written.
298 	 *            This is an instance previously obtained from
299 	 *            {@link #newPack(PackSource)}.
300 	 * @param ext
301 	 *            file extension that will be written i.e "pack" or "idx".
302 	 * @return channel to write the file.
303 	 * @throws IOException
304 	 *             the file cannot be opened.
305 	 */
306 	protected abstract DfsOutputStream writeFile(
307 			DfsPackDescription desc, PackExt ext) throws IOException;
308 
309 	void addPack(DfsPackFile newPack) throws IOException {
310 		PackList o, n;
311 		do {
312 			o = packList.get();
313 			if (o == NO_PACKS) {
314 				// The repository may not have needed any existing objects to
315 				// complete the current task of creating a pack (e.g. push of a
316 				// pack with no external deltas). Because we don't scan for
317 				// newly added packs on missed object lookups, scan now to
318 				// make sure all older packs are available in the packList.
319 				o = scanPacks(o);
320 
321 				// Its possible the scan identified the pack we were asked to
322 				// add, as the pack was already committed via commitPack().
323 				// If this is the case return without changing the list.
324 				for (DfsPackFile p : o.packs) {
325 					if (p == newPack)
326 						return;
327 				}
328 			}
329 
330 			DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
331 			packs[0] = newPack;
332 			System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
333 			n = new PackList(packs);
334 		} while (!packList.compareAndSet(o, n));
335 	}
336 
337 	private PackList scanPacks(final PackList original) throws IOException {
338 		PackList o, n;
339 		synchronized (packList) {
340 			do {
341 				o = packList.get();
342 				if (o != original) {
343 					// Another thread did the scan for us, while we
344 					// were blocked on the monitor above.
345 					//
346 					return o;
347 				}
348 				n = scanPacksImpl(o);
349 				if (n == o)
350 					return n;
351 			} while (!packList.compareAndSet(o, n));
352 		}
353 		getRepository().fireEvent(new DfsPacksChangedEvent());
354 		return n;
355 	}
356 
357 	private PackList scanPacksImpl(PackList old) throws IOException {
358 		DfsBlockCache cache = DfsBlockCache.getInstance();
359 		Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old);
360 		List<DfsPackDescription> scanned = listPacks();
361 		Collections.sort(scanned);
362 
363 		List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size());
364 		boolean foundNew = false;
365 		for (DfsPackDescription dsc : scanned) {
366 			DfsPackFile oldPack = forReuse.remove(dsc);
367 			if (oldPack != null) {
368 				list.add(oldPack);
369 			} else {
370 				list.add(cache.getOrCreate(dsc, null));
371 				foundNew = true;
372 			}
373 		}
374 
375 		for (DfsPackFile p : forReuse.values())
376 			p.close();
377 		if (list.isEmpty())
378 			return new PackList(NO_PACKS.packs);
379 		if (!foundNew)
380 			return old;
381 		return new PackList(list.toArray(new DfsPackFile[list.size()]));
382 	}
383 
384 	private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) {
385 		Map<DfsPackDescription, DfsPackFile> forReuse
386 			= new HashMap<DfsPackDescription, DfsPackFile>();
387 		for (DfsPackFile p : old.packs) {
388 			if (p.invalid()) {
389 				// The pack instance is corrupted, and cannot be safely used
390 				// again. Do not include it in our reuse map.
391 				//
392 				p.close();
393 				continue;
394 			}
395 
396 			DfsPackFile prior = forReuse.put(p.getPackDescription(), p);
397 			if (prior != null) {
398 				// This should never occur. It should be impossible for us
399 				// to have two pack files with the same name, as all of them
400 				// came out of the same directory. If it does, we promised to
401 				// close any PackFiles we did not reuse, so close the second,
402 				// readers are likely to be actively using the first.
403 				//
404 				forReuse.put(prior.getPackDescription(), prior);
405 				p.close();
406 			}
407 		}
408 		return forReuse;
409 	}
410 
411 	/** Clears the cached list of packs, forcing them to be scanned again. */
412 	protected void clearCache() {
413 		packList.set(NO_PACKS);
414 	}
415 
416 	@Override
417 	public void close() {
418 		// PackList packs = packList.get();
419 		packList.set(NO_PACKS);
420 
421 		// TODO Close packs if they aren't cached.
422 		// for (DfsPackFile p : packs.packs)
423 		// p.close();
424 	}
425 
426 	private static final class PackList {
427 		/** All known packs, sorted. */
428 		final DfsPackFile[] packs;
429 
430 		PackList(final DfsPackFile[] packs) {
431 			this.packs = packs;
432 		}
433 	}
434 }