View Javadoc
1   /*
2    * Copyright (C) 2008-2009, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.IOException;
48  import java.lang.ref.ReferenceQueue;
49  import java.lang.ref.SoftReference;
50  import java.util.Random;
51  import java.util.concurrent.atomic.AtomicInteger;
52  import java.util.concurrent.atomic.AtomicLong;
53  import java.util.concurrent.atomic.AtomicReferenceArray;
54  import java.util.concurrent.locks.ReentrantLock;
55  
56  import org.eclipse.jgit.internal.JGitText;
57  import org.eclipse.jgit.storage.file.WindowCacheConfig;
58  
59  /**
60   * Caches slices of a {@link PackFile} in memory for faster read access.
61   * <p>
62   * The WindowCache serves as a Java based "buffer cache", loading segments of a
63   * PackFile into the JVM heap prior to use. As JGit often wants to do reads of
64   * only tiny slices of a file, the WindowCache tries to smooth out these tiny
65   * reads into larger block-sized IO operations.
66   * <p>
67   * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
68   * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
69   * This is ensured by an array of locks, with the tuple hashed to a lock
70   * instance.
71   * <p>
72   * During a miss, older entries are evicted from the cache so long as
73   * {@link #isFull()} returns true.
74   * <p>
75   * Its too expensive during object access to be 100% accurate with a least
76   * recently used (LRU) algorithm. Strictly ordering every read is a lot of
77   * overhead that typically doesn't yield a corresponding benefit to the
78   * application.
79   * <p>
80   * This cache implements a loose LRU policy by randomly picking a window
81   * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
82   * within that window.
83   * <p>
84   * Entities created by the cache are held under SoftReferences, permitting the
85   * Java runtime's garbage collector to evict entries when heap memory gets low.
86   * Most JREs implement a loose least recently used algorithm for this eviction.
87   * <p>
88   * The internal hash table does not expand at runtime, instead it is fixed in
89   * size at cache creation time. The internal lock table used to gate load
90   * invocations is also fixed in size.
91   * <p>
92   * The key tuple is passed through to methods as a pair of parameters rather
93   * than as a single Object, thus reducing the transient memory allocations of
94   * callers. It is more efficient to avoid the allocation, as we can't be 100%
95   * sure that a JIT would be able to stack-allocate a key tuple.
96   * <p>
97   * This cache has an implementation rule such that:
98   * <ul>
99   * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
100  * for a given <code>(PackFile,position)</code> tuple.</li>
101  * <li>For every <code>load()</code> invocation there is exactly one
102  * {@link #createRef(PackFile, long, ByteWindow)} invocation to wrap a
103  * SoftReference around the cached entity.</li>
104  * <li>For every Reference created by <code>createRef()</code> there will be
105  * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
106  * with the (now expired) cached entity.</li>
107  * </ul>
108  * <p>
109  * Therefore, it is safe to perform resource accounting increments during the
110  * {@link #load(PackFile, long)} or
111  * {@link #createRef(PackFile, long, ByteWindow)} methods, and matching
112  * decrements during {@link #clear(Ref)}. Implementors may need to override
113  * {@link #createRef(PackFile, long, ByteWindow)} in order to embed additional
114  * accounting information into an implementation specific {@link Ref} subclass,
115  * as the cached entity may have already been evicted by the JRE's garbage
116  * collector.
117  * <p>
118  * To maintain higher concurrency workloads, during eviction only one thread
119  * performs the eviction work, while other threads can continue to insert new
120  * objects in parallel. This means that the cache can be temporarily over limit,
121  * especially if the nominated eviction thread is being starved relative to the
122  * other threads.
123  */
124 public class WindowCache {
125 	private static final int bits(int newSize) {
126 		if (newSize < 4096)
127 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
128 		if (Integer.bitCount(newSize) != 1)
129 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBePowerOf2);
130 		return Integer.numberOfTrailingZeros(newSize);
131 	}
132 
133 	private static final Random rng = new Random();
134 
135 	private static volatile WindowCache cache;
136 
137 	private static volatile int streamFileThreshold;
138 
139 	static {
140 		reconfigure(new WindowCacheConfig());
141 	}
142 
143 	/**
144 	 * Modify the configuration of the window cache.
145 	 * <p>
146 	 * The new configuration is applied immediately. If the new limits are
147 	 * smaller than what what is currently cached, older entries will be purged
148 	 * as soon as possible to allow the cache to meet the new limit.
149 	 *
150 	 * @deprecated use {@code cfg.install()} to avoid internal reference.
151 	 * @param cfg
152 	 *            the new window cache configuration.
153 	 * @throws IllegalArgumentException
154 	 *             the cache configuration contains one or more invalid
155 	 *             settings, usually too low of a limit.
156 	 */
157 	@Deprecated
158 	public static void reconfigure(final WindowCacheConfig cfg) {
159 		final WindowCache nc = new WindowCache(cfg);
160 		final WindowCache oc = cache;
161 		if (oc != null)
162 			oc.removeAll();
163 		cache = nc;
164 		streamFileThreshold = cfg.getStreamFileThreshold();
165 		DeltaBaseCache.reconfigure(cfg);
166 	}
167 
168 	static int getStreamFileThreshold() {
169 		return streamFileThreshold;
170 	}
171 
172 	static WindowCache getInstance() {
173 		return cache;
174 	}
175 
176 	static final ByteWindow get(final PackFile pack, final long offset)
177 			throws IOException {
178 		final WindowCache c = cache;
179 		final ByteWindow r = c.getOrLoad(pack, c.toStart(offset));
180 		if (c != cache) {
181 			// The cache was reconfigured while we were using the old one
182 			// to load this window. The window is still valid, but our
183 			// cache may think its still live. Ensure the window is removed
184 			// from the old cache so resources can be released.
185 			//
186 			c.removeAll();
187 		}
188 		return r;
189 	}
190 
191 	static final void purge(final PackFile pack) {
192 		cache.removeAll(pack);
193 	}
194 
195 	/** ReferenceQueue to cleanup released and garbage collected windows. */
196 	private final ReferenceQueue<ByteWindow> queue;
197 
198 	/** Number of entries in {@link #table}. */
199 	private final int tableSize;
200 
201 	/** Access clock for loose LRU. */
202 	private final AtomicLong clock;
203 
204 	/** Hash bucket directory; entries are chained below. */
205 	private final AtomicReferenceArray<Entry> table;
206 
207 	/** Locks to prevent concurrent loads for same (PackFile,position). */
208 	private final Lock[] locks;
209 
210 	/** Lock to elect the eviction thread after a load occurs. */
211 	private final ReentrantLock evictLock;
212 
213 	/** Number of {@link #table} buckets to scan for an eviction window. */
214 	private final int evictBatch;
215 
216 	private final int maxFiles;
217 
218 	private final long maxBytes;
219 
220 	private final boolean mmap;
221 
222 	private final int windowSizeShift;
223 
224 	private final int windowSize;
225 
226 	private final AtomicInteger openFiles;
227 
228 	private final AtomicLong openBytes;
229 
230 	private WindowCache(final WindowCacheConfig cfg) {
231 		tableSize = tableSize(cfg);
232 		final int lockCount = lockCount(cfg);
233 		if (tableSize < 1)
234 			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
235 		if (lockCount < 1)
236 			throw new IllegalArgumentException(JGitText.get().lockCountMustBeGreaterOrEqual1);
237 
238 		queue = new ReferenceQueue<ByteWindow>();
239 		clock = new AtomicLong(1);
240 		table = new AtomicReferenceArray<Entry>(tableSize);
241 		locks = new Lock[lockCount];
242 		for (int i = 0; i < locks.length; i++)
243 			locks[i] = new Lock();
244 		evictLock = new ReentrantLock();
245 
246 		int eb = (int) (tableSize * .1);
247 		if (64 < eb)
248 			eb = 64;
249 		else if (eb < 4)
250 			eb = 4;
251 		if (tableSize < eb)
252 			eb = tableSize;
253 		evictBatch = eb;
254 
255 		maxFiles = cfg.getPackedGitOpenFiles();
256 		maxBytes = cfg.getPackedGitLimit();
257 		mmap = cfg.isPackedGitMMAP();
258 		windowSizeShift = bits(cfg.getPackedGitWindowSize());
259 		windowSize = 1 << windowSizeShift;
260 
261 		openFiles = new AtomicInteger();
262 		openBytes = new AtomicLong();
263 
264 		if (maxFiles < 1)
265 			throw new IllegalArgumentException(JGitText.get().openFilesMustBeAtLeast1);
266 		if (maxBytes < windowSize)
267 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
268 	}
269 
270 	int getOpenFiles() {
271 		return openFiles.get();
272 	}
273 
274 	long getOpenBytes() {
275 		return openBytes.get();
276 	}
277 
278 	private int hash(final int packHash, final long off) {
279 		return packHash + (int) (off >>> windowSizeShift);
280 	}
281 
282 	private ByteWindow load(final PackFile pack, final long offset)
283 			throws IOException {
284 		if (pack.beginWindowCache())
285 			openFiles.incrementAndGet();
286 		try {
287 			if (mmap)
288 				return pack.mmap(offset, windowSize);
289 			return pack.read(offset, windowSize);
290 		} catch (IOException e) {
291 			close(pack);
292 			throw e;
293 		} catch (RuntimeException e) {
294 			close(pack);
295 			throw e;
296 		} catch (Error e) {
297 			close(pack);
298 			throw e;
299 		}
300 	}
301 
302 	private Ref createRef(final PackFile p, final long o, final ByteWindow v) {
303 		final Ref ref = new Ref(p, o, v, queue);
304 		openBytes.addAndGet(ref.size);
305 		return ref;
306 	}
307 
308 	private void clear(final Ref ref) {
309 		openBytes.addAndGet(-ref.size);
310 		close(ref.pack);
311 	}
312 
313 	private void close(final PackFile pack) {
314 		if (pack.endWindowCache())
315 			openFiles.decrementAndGet();
316 	}
317 
318 	private boolean isFull() {
319 		return maxFiles < openFiles.get() || maxBytes < openBytes.get();
320 	}
321 
322 	private long toStart(final long offset) {
323 		return (offset >>> windowSizeShift) << windowSizeShift;
324 	}
325 
326 	private static int tableSize(final WindowCacheConfig cfg) {
327 		final int wsz = cfg.getPackedGitWindowSize();
328 		final long limit = cfg.getPackedGitLimit();
329 		if (wsz <= 0)
330 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
331 		if (limit < wsz)
332 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
333 		return (int) Math.min(5 * (limit / wsz) / 2, 2000000000);
334 	}
335 
336 	private static int lockCount(final WindowCacheConfig cfg) {
337 		return Math.max(cfg.getPackedGitOpenFiles(), 32);
338 	}
339 
340 	/**
341 	 * Lookup a cached object, creating and loading it if it doesn't exist.
342 	 *
343 	 * @param pack
344 	 *            the pack that "contains" the cached object.
345 	 * @param position
346 	 *            offset within <code>pack</code> of the object.
347 	 * @return the object reference.
348 	 * @throws IOException
349 	 *             the object reference was not in the cache and could not be
350 	 *             obtained by {@link #load(PackFile, long)}.
351 	 */
352 	private ByteWindow getOrLoad(final PackFile pack, final long position)
353 			throws IOException {
354 		final int slot = slot(pack, position);
355 		final Entry e1 = table.get(slot);
356 		ByteWindow v = scan(e1, pack, position);
357 		if (v != null)
358 			return v;
359 
360 		synchronized (lock(pack, position)) {
361 			Entry e2 = table.get(slot);
362 			if (e2 != e1) {
363 				v = scan(e2, pack, position);
364 				if (v != null)
365 					return v;
366 			}
367 
368 			v = load(pack, position);
369 			final Ref ref = createRef(pack, position, v);
370 			hit(ref);
371 			for (;;) {
372 				final Entry n = new Entry(clean(e2), ref);
373 				if (table.compareAndSet(slot, e2, n))
374 					break;
375 				e2 = table.get(slot);
376 			}
377 		}
378 
379 		if (evictLock.tryLock()) {
380 			try {
381 				gc();
382 				evict();
383 			} finally {
384 				evictLock.unlock();
385 			}
386 		}
387 
388 		return v;
389 	}
390 
391 	private ByteWindow scan(Entry n, final PackFile pack, final long position) {
392 		for (; n != null; n = n.next) {
393 			final Ref r = n.ref;
394 			if (r.pack == pack && r.position == position) {
395 				final ByteWindow v = r.get();
396 				if (v != null) {
397 					hit(r);
398 					return v;
399 				}
400 				n.kill();
401 				break;
402 			}
403 		}
404 		return null;
405 	}
406 
407 	private void hit(final Ref r) {
408 		// We don't need to be 100% accurate here. Its sufficient that at least
409 		// one thread performs the increment. Any other concurrent access at
410 		// exactly the same time can simply use the same clock value.
411 		//
412 		// Consequently we attempt the set, but we don't try to recover should
413 		// it fail. This is why we don't use getAndIncrement() here.
414 		//
415 		final long c = clock.get();
416 		clock.compareAndSet(c, c + 1);
417 		r.lastAccess = c;
418 	}
419 
420 	private void evict() {
421 		while (isFull()) {
422 			int ptr = rng.nextInt(tableSize);
423 			Entry old = null;
424 			int slot = 0;
425 			for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
426 				if (tableSize <= ptr)
427 					ptr = 0;
428 				for (Entry e = table.get(ptr); e != null; e = e.next) {
429 					if (e.dead)
430 						continue;
431 					if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
432 						old = e;
433 						slot = ptr;
434 					}
435 				}
436 			}
437 			if (old != null) {
438 				old.kill();
439 				gc();
440 				final Entry e1 = table.get(slot);
441 				table.compareAndSet(slot, e1, clean(e1));
442 			}
443 		}
444 	}
445 
446 	/**
447 	 * Clear every entry from the cache.
448 	 * <p>
449 	 * This is a last-ditch effort to clear out the cache, such as before it
450 	 * gets replaced by another cache that is configured differently. This
451 	 * method tries to force every cached entry through {@link #clear(Ref)} to
452 	 * ensure that resources are correctly accounted for and cleaned up by the
453 	 * subclass. A concurrent reader loading entries while this method is
454 	 * running may cause resource accounting failures.
455 	 */
456 	private void removeAll() {
457 		for (int s = 0; s < tableSize; s++) {
458 			Entry e1;
459 			do {
460 				e1 = table.get(s);
461 				for (Entry e = e1; e != null; e = e.next)
462 					e.kill();
463 			} while (!table.compareAndSet(s, e1, null));
464 		}
465 		gc();
466 	}
467 
468 	/**
469 	 * Clear all entries related to a single file.
470 	 * <p>
471 	 * Typically this method is invoked during {@link PackFile#close()}, when we
472 	 * know the pack is never going to be useful to us again (for example, it no
473 	 * longer exists on disk). A concurrent reader loading an entry from this
474 	 * same pack may cause the pack to become stuck in the cache anyway.
475 	 *
476 	 * @param pack
477 	 *            the file to purge all entries of.
478 	 */
479 	private void removeAll(final PackFile pack) {
480 		for (int s = 0; s < tableSize; s++) {
481 			final Entry e1 = table.get(s);
482 			boolean hasDead = false;
483 			for (Entry e = e1; e != null; e = e.next) {
484 				if (e.ref.pack == pack) {
485 					e.kill();
486 					hasDead = true;
487 				} else if (e.dead)
488 					hasDead = true;
489 			}
490 			if (hasDead)
491 				table.compareAndSet(s, e1, clean(e1));
492 		}
493 		gc();
494 	}
495 
496 	private void gc() {
497 		Ref r;
498 		while ((r = (Ref) queue.poll()) != null) {
499 			// Sun's Java 5 and 6 implementation have a bug where a Reference
500 			// can be enqueued and dequeued twice on the same reference queue
501 			// due to a race condition within ReferenceQueue.enqueue(Reference).
502 			//
503 			// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6837858
504 			//
505 			// We CANNOT permit a Reference to come through us twice, as it will
506 			// skew the resource counters we maintain. Our canClear() check here
507 			// provides a way to skip the redundant dequeues, if any.
508 			//
509 			if (r.canClear()) {
510 				clear(r);
511 
512 				boolean found = false;
513 				final int s = slot(r.pack, r.position);
514 				final Entry e1 = table.get(s);
515 				for (Entry n = e1; n != null; n = n.next) {
516 					if (n.ref == r) {
517 						n.dead = true;
518 						found = true;
519 						break;
520 					}
521 				}
522 				if (found)
523 					table.compareAndSet(s, e1, clean(e1));
524 			}
525 		}
526 	}
527 
528 	private int slot(final PackFile pack, final long position) {
529 		return (hash(pack.hash, position) >>> 1) % tableSize;
530 	}
531 
532 	private Lock lock(final PackFile pack, final long position) {
533 		return locks[(hash(pack.hash, position) >>> 1) % locks.length];
534 	}
535 
536 	private static Entry clean(Entry top) {
537 		while (top != null && top.dead) {
538 			top.ref.enqueue();
539 			top = top.next;
540 		}
541 		if (top == null)
542 			return null;
543 		final Entry n = clean(top.next);
544 		return n == top.next ? top : new Entry(n, top.ref);
545 	}
546 
547 	private static class Entry {
548 		/** Next entry in the hash table's chain list. */
549 		final Entry next;
550 
551 		/** The referenced object. */
552 		final Ref ref;
553 
554 		/**
555 		 * Marked true when ref.get() returns null and the ref is dead.
556 		 * <p>
557 		 * A true here indicates that the ref is no longer accessible, and that
558 		 * we therefore need to eventually purge this Entry object out of the
559 		 * bucket's chain.
560 		 */
561 		volatile boolean dead;
562 
563 		Entry(final Entry n, final Ref r) {
564 			next = n;
565 			ref = r;
566 		}
567 
568 		final void kill() {
569 			dead = true;
570 			ref.enqueue();
571 		}
572 	}
573 
574 	/** A soft reference wrapped around a cached object. */
575 	private static class Ref extends SoftReference<ByteWindow> {
576 		final PackFile pack;
577 
578 		final long position;
579 
580 		final int size;
581 
582 		long lastAccess;
583 
584 		private boolean cleared;
585 
586 		protected Ref(final PackFile pack, final long position,
587 				final ByteWindow v, final ReferenceQueue<ByteWindow> queue) {
588 			super(v, queue);
589 			this.pack = pack;
590 			this.position = position;
591 			this.size = v.size();
592 		}
593 
594 		final synchronized boolean canClear() {
595 			if (cleared)
596 				return false;
597 			cleared = true;
598 			return true;
599 		}
600 	}
601 
602 	private static final class Lock {
603 		// Used only for its implicit monitor.
604 	}
605 }