View Javadoc
1   /*
2    * Copyright (C) 2008-2009, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.file;
46  
47  import java.io.IOException;
48  import java.lang.ref.ReferenceQueue;
49  import java.lang.ref.SoftReference;
50  import java.util.Random;
51  import java.util.concurrent.atomic.AtomicInteger;
52  import java.util.concurrent.atomic.AtomicLong;
53  import java.util.concurrent.atomic.AtomicReferenceArray;
54  import java.util.concurrent.locks.ReentrantLock;
55  
56  import org.eclipse.jgit.internal.JGitText;
57  import org.eclipse.jgit.storage.file.WindowCacheConfig;
58  
59  /**
60   * Caches slices of a {@link org.eclipse.jgit.internal.storage.file.PackFile} in
61   * memory for faster read access.
62   * <p>
63   * The WindowCache serves as a Java based "buffer cache", loading segments of a
64   * PackFile into the JVM heap prior to use. As JGit often wants to do reads of
65   * only tiny slices of a file, the WindowCache tries to smooth out these tiny
66   * reads into larger block-sized IO operations.
67   * <p>
68   * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
69   * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
70   * This is ensured by an array of locks, with the tuple hashed to a lock
71   * instance.
72   * <p>
73   * During a miss, older entries are evicted from the cache so long as
74   * {@link #isFull()} returns true.
75   * <p>
76   * Its too expensive during object access to be 100% accurate with a least
77   * recently used (LRU) algorithm. Strictly ordering every read is a lot of
78   * overhead that typically doesn't yield a corresponding benefit to the
79   * application.
80   * <p>
81   * This cache implements a loose LRU policy by randomly picking a window
82   * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
83   * within that window.
84   * <p>
85   * Entities created by the cache are held under SoftReferences, permitting the
86   * Java runtime's garbage collector to evict entries when heap memory gets low.
87   * Most JREs implement a loose least recently used algorithm for this eviction.
88   * <p>
89   * The internal hash table does not expand at runtime, instead it is fixed in
90   * size at cache creation time. The internal lock table used to gate load
91   * invocations is also fixed in size.
92   * <p>
93   * The key tuple is passed through to methods as a pair of parameters rather
94   * than as a single Object, thus reducing the transient memory allocations of
95   * callers. It is more efficient to avoid the allocation, as we can't be 100%
96   * sure that a JIT would be able to stack-allocate a key tuple.
97   * <p>
98   * This cache has an implementation rule such that:
99   * <ul>
100  * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
101  * for a given <code>(PackFile,position)</code> tuple.</li>
102  * <li>For every <code>load()</code> invocation there is exactly one
103  * {@link #createRef(PackFile, long, ByteWindow)} invocation to wrap a
104  * SoftReference around the cached entity.</li>
105  * <li>For every Reference created by <code>createRef()</code> there will be
106  * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
107  * with the (now expired) cached entity.</li>
108  * </ul>
109  * <p>
110  * Therefore, it is safe to perform resource accounting increments during the
111  * {@link #load(PackFile, long)} or
112  * {@link #createRef(PackFile, long, ByteWindow)} methods, and matching
113  * decrements during {@link #clear(Ref)}. Implementors may need to override
114  * {@link #createRef(PackFile, long, ByteWindow)} in order to embed additional
115  * accounting information into an implementation specific
116  * {@link org.eclipse.jgit.internal.storage.file.WindowCache.Ref} subclass, as
117  * the cached entity may have already been evicted by the JRE's garbage
118  * collector.
119  * <p>
120  * To maintain higher concurrency workloads, during eviction only one thread
121  * performs the eviction work, while other threads can continue to insert new
122  * objects in parallel. This means that the cache can be temporarily over limit,
123  * especially if the nominated eviction thread is being starved relative to the
124  * other threads.
125  */
126 public class WindowCache {
127 	private static final int bits(int newSize) {
128 		if (newSize < 4096)
129 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
130 		if (Integer.bitCount(newSize) != 1)
131 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBePowerOf2);
132 		return Integer.numberOfTrailingZeros(newSize);
133 	}
134 
135 	private static final Random rng = new Random();
136 
137 	private static volatile WindowCache cache;
138 
139 	private static volatile int streamFileThreshold;
140 
141 	static {
142 		reconfigure(new WindowCacheConfig());
143 	}
144 
145 	/**
146 	 * Modify the configuration of the window cache.
147 	 * <p>
148 	 * The new configuration is applied immediately. If the new limits are
149 	 * smaller than what what is currently cached, older entries will be purged
150 	 * as soon as possible to allow the cache to meet the new limit.
151 	 *
152 	 * @deprecated use {@code cfg.install()} to avoid internal reference.
153 	 * @param cfg
154 	 *            the new window cache configuration.
155 	 * @throws java.lang.IllegalArgumentException
156 	 *             the cache configuration contains one or more invalid
157 	 *             settings, usually too low of a limit.
158 	 */
159 	@Deprecated
160 	public static void reconfigure(WindowCacheConfig cfg) {
161 		final WindowCache nc = new WindowCache(cfg);
162 		final WindowCache oc = cache;
163 		if (oc != null)
164 			oc.removeAll();
165 		cache = nc;
166 		streamFileThreshold = cfg.getStreamFileThreshold();
167 		DeltaBaseCache.reconfigure(cfg);
168 	}
169 
170 	static int getStreamFileThreshold() {
171 		return streamFileThreshold;
172 	}
173 
174 	/**
175 	 * @return the cached instance.
176 	 */
177 	public static WindowCache getInstance() {
178 		return cache;
179 	}
180 
181 	static final ByteWindow get(PackFile pack, long offset)
182 			throws IOException {
183 		final WindowCache c = cache;
184 		final ByteWindow r = c.getOrLoad(pack, c.toStart(offset));
185 		if (c != cache) {
186 			// The cache was reconfigured while we were using the old one
187 			// to load this window. The window is still valid, but our
188 			// cache may think its still live. Ensure the window is removed
189 			// from the old cache so resources can be released.
190 			//
191 			c.removeAll();
192 		}
193 		return r;
194 	}
195 
196 	static final void purge(PackFile pack) {
197 		cache.removeAll(pack);
198 	}
199 
200 	/** ReferenceQueue to cleanup released and garbage collected windows. */
201 	private final ReferenceQueue<ByteWindow> queue;
202 
203 	/** Number of entries in {@link #table}. */
204 	private final int tableSize;
205 
206 	/** Access clock for loose LRU. */
207 	private final AtomicLong clock;
208 
209 	/** Hash bucket directory; entries are chained below. */
210 	private final AtomicReferenceArray<Entry> table;
211 
212 	/** Locks to prevent concurrent loads for same (PackFile,position). */
213 	private final Lock[] locks;
214 
215 	/** Lock to elect the eviction thread after a load occurs. */
216 	private final ReentrantLock evictLock;
217 
218 	/** Number of {@link #table} buckets to scan for an eviction window. */
219 	private final int evictBatch;
220 
221 	private final int maxFiles;
222 
223 	private final long maxBytes;
224 
225 	private final boolean mmap;
226 
227 	private final int windowSizeShift;
228 
229 	private final int windowSize;
230 
231 	private final AtomicInteger openFiles;
232 
233 	private final AtomicLong openBytes;
234 
235 	private WindowCache(WindowCacheConfig cfg) {
236 		tableSize = tableSize(cfg);
237 		final int lockCount = lockCount(cfg);
238 		if (tableSize < 1)
239 			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
240 		if (lockCount < 1)
241 			throw new IllegalArgumentException(JGitText.get().lockCountMustBeGreaterOrEqual1);
242 
243 		queue = new ReferenceQueue<>();
244 		clock = new AtomicLong(1);
245 		table = new AtomicReferenceArray<>(tableSize);
246 		locks = new Lock[lockCount];
247 		for (int i = 0; i < locks.length; i++)
248 			locks[i] = new Lock();
249 		evictLock = new ReentrantLock();
250 
251 		int eb = (int) (tableSize * .1);
252 		if (64 < eb)
253 			eb = 64;
254 		else if (eb < 4)
255 			eb = 4;
256 		if (tableSize < eb)
257 			eb = tableSize;
258 		evictBatch = eb;
259 
260 		maxFiles = cfg.getPackedGitOpenFiles();
261 		maxBytes = cfg.getPackedGitLimit();
262 		mmap = cfg.isPackedGitMMAP();
263 		windowSizeShift = bits(cfg.getPackedGitWindowSize());
264 		windowSize = 1 << windowSizeShift;
265 
266 		openFiles = new AtomicInteger();
267 		openBytes = new AtomicLong();
268 
269 		if (maxFiles < 1)
270 			throw new IllegalArgumentException(JGitText.get().openFilesMustBeAtLeast1);
271 		if (maxBytes < windowSize)
272 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
273 	}
274 
275 	/**
276 	 * @return the number of open files.
277 	 */
278 	public int getOpenFiles() {
279 		return openFiles.get();
280 	}
281 
282 	/**
283 	 * @return the number of open bytes.
284 	 */
285 	public long getOpenBytes() {
286 		return openBytes.get();
287 	}
288 
289 	private int hash(int packHash, long off) {
290 		return packHash + (int) (off >>> windowSizeShift);
291 	}
292 
293 	private ByteWindow load(PackFile pack, long offset)
294 			throws IOException {
295 		if (pack.beginWindowCache())
296 			openFiles.incrementAndGet();
297 		try {
298 			if (mmap)
299 				return pack.mmap(offset, windowSize);
300 			return pack.read(offset, windowSize);
301 		} catch (IOException e) {
302 			close(pack);
303 			throw e;
304 		} catch (RuntimeException e) {
305 			close(pack);
306 			throw e;
307 		} catch (Error e) {
308 			close(pack);
309 			throw e;
310 		}
311 	}
312 
313 	private Ref createRef(PackFile p, long o, ByteWindow v) {
314 		final Ref ref = new Ref(p, o, v, queue);
315 		openBytes.addAndGet(ref.size);
316 		return ref;
317 	}
318 
319 	private void clear(Ref ref) {
320 		openBytes.addAndGet(-ref.size);
321 		close(ref.pack);
322 	}
323 
324 	private void close(PackFile pack) {
325 		if (pack.endWindowCache())
326 			openFiles.decrementAndGet();
327 	}
328 
329 	private boolean isFull() {
330 		return maxFiles < openFiles.get() || maxBytes < openBytes.get();
331 	}
332 
333 	private long toStart(long offset) {
334 		return (offset >>> windowSizeShift) << windowSizeShift;
335 	}
336 
337 	private static int tableSize(WindowCacheConfig cfg) {
338 		final int wsz = cfg.getPackedGitWindowSize();
339 		final long limit = cfg.getPackedGitLimit();
340 		if (wsz <= 0)
341 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
342 		if (limit < wsz)
343 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
344 		return (int) Math.min(5 * (limit / wsz) / 2, 2000000000);
345 	}
346 
347 	private static int lockCount(WindowCacheConfig cfg) {
348 		return Math.max(cfg.getPackedGitOpenFiles(), 32);
349 	}
350 
351 	/**
352 	 * Lookup a cached object, creating and loading it if it doesn't exist.
353 	 *
354 	 * @param pack
355 	 *            the pack that "contains" the cached object.
356 	 * @param position
357 	 *            offset within <code>pack</code> of the object.
358 	 * @return the object reference.
359 	 * @throws IOException
360 	 *             the object reference was not in the cache and could not be
361 	 *             obtained by {@link #load(PackFile, long)}.
362 	 */
363 	private ByteWindow getOrLoad(PackFile pack, long position)
364 			throws IOException {
365 		final int slot = slot(pack, position);
366 		final Entry e1 = table.get(slot);
367 		ByteWindow v = scan(e1, pack, position);
368 		if (v != null)
369 			return v;
370 
371 		synchronized (lock(pack, position)) {
372 			Entry e2 = table.get(slot);
373 			if (e2 != e1) {
374 				v = scan(e2, pack, position);
375 				if (v != null)
376 					return v;
377 			}
378 
379 			v = load(pack, position);
380 			final Ref ref = createRef(pack, position, v);
381 			hit(ref);
382 			for (;;) {
383 				final Entry n = new Entry(clean(e2), ref);
384 				if (table.compareAndSet(slot, e2, n))
385 					break;
386 				e2 = table.get(slot);
387 			}
388 		}
389 
390 		if (evictLock.tryLock()) {
391 			try {
392 				gc();
393 				evict();
394 			} finally {
395 				evictLock.unlock();
396 			}
397 		}
398 
399 		return v;
400 	}
401 
402 	private ByteWindow scan(Entry n, PackFile pack, long position) {
403 		for (; n != null; n = n.next) {
404 			final Ref r = n.ref;
405 			if (r.pack == pack && r.position == position) {
406 				final ByteWindow v = r.get();
407 				if (v != null) {
408 					hit(r);
409 					return v;
410 				}
411 				n.kill();
412 				break;
413 			}
414 		}
415 		return null;
416 	}
417 
418 	private void hit(Ref r) {
419 		// We don't need to be 100% accurate here. Its sufficient that at least
420 		// one thread performs the increment. Any other concurrent access at
421 		// exactly the same time can simply use the same clock value.
422 		//
423 		// Consequently we attempt the set, but we don't try to recover should
424 		// it fail. This is why we don't use getAndIncrement() here.
425 		//
426 		final long c = clock.get();
427 		clock.compareAndSet(c, c + 1);
428 		r.lastAccess = c;
429 	}
430 
431 	private void evict() {
432 		while (isFull()) {
433 			int ptr = rng.nextInt(tableSize);
434 			Entry old = null;
435 			int slot = 0;
436 			for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
437 				if (tableSize <= ptr)
438 					ptr = 0;
439 				for (Entry e = table.get(ptr); e != null; e = e.next) {
440 					if (e.dead)
441 						continue;
442 					if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
443 						old = e;
444 						slot = ptr;
445 					}
446 				}
447 			}
448 			if (old != null) {
449 				old.kill();
450 				gc();
451 				final Entry e1 = table.get(slot);
452 				table.compareAndSet(slot, e1, clean(e1));
453 			}
454 		}
455 	}
456 
457 	/**
458 	 * Clear every entry from the cache.
459 	 * <p>
460 	 * This is a last-ditch effort to clear out the cache, such as before it
461 	 * gets replaced by another cache that is configured differently. This
462 	 * method tries to force every cached entry through {@link #clear(Ref)} to
463 	 * ensure that resources are correctly accounted for and cleaned up by the
464 	 * subclass. A concurrent reader loading entries while this method is
465 	 * running may cause resource accounting failures.
466 	 */
467 	private void removeAll() {
468 		for (int s = 0; s < tableSize; s++) {
469 			Entry e1;
470 			do {
471 				e1 = table.get(s);
472 				for (Entry e = e1; e != null; e = e.next)
473 					e.kill();
474 			} while (!table.compareAndSet(s, e1, null));
475 		}
476 		gc();
477 	}
478 
479 	/**
480 	 * Clear all entries related to a single file.
481 	 * <p>
482 	 * Typically this method is invoked during {@link PackFile#close()}, when we
483 	 * know the pack is never going to be useful to us again (for example, it no
484 	 * longer exists on disk). A concurrent reader loading an entry from this
485 	 * same pack may cause the pack to become stuck in the cache anyway.
486 	 *
487 	 * @param pack
488 	 *            the file to purge all entries of.
489 	 */
490 	private void removeAll(PackFile pack) {
491 		for (int s = 0; s < tableSize; s++) {
492 			final Entry e1 = table.get(s);
493 			boolean hasDead = false;
494 			for (Entry e = e1; e != null; e = e.next) {
495 				if (e.ref.pack == pack) {
496 					e.kill();
497 					hasDead = true;
498 				} else if (e.dead)
499 					hasDead = true;
500 			}
501 			if (hasDead)
502 				table.compareAndSet(s, e1, clean(e1));
503 		}
504 		gc();
505 	}
506 
507 	private void gc() {
508 		Ref r;
509 		while ((r = (Ref) queue.poll()) != null) {
510 			clear(r);
511 
512 			final int s = slot(r.pack, r.position);
513 			final Entry e1 = table.get(s);
514 			for (Entry n = e1; n != null; n = n.next) {
515 				if (n.ref == r) {
516 					n.dead = true;
517 					table.compareAndSet(s, e1, clean(e1));
518 					break;
519 				}
520 			}
521 		}
522 	}
523 
524 	private int slot(PackFile pack, long position) {
525 		return (hash(pack.hash, position) >>> 1) % tableSize;
526 	}
527 
528 	private Lock lock(PackFile pack, long position) {
529 		return locks[(hash(pack.hash, position) >>> 1) % locks.length];
530 	}
531 
532 	private static Entry clean(Entry top) {
533 		while (top != null && top.dead) {
534 			top.ref.enqueue();
535 			top = top.next;
536 		}
537 		if (top == null)
538 			return null;
539 		final Entry n = clean(top.next);
540 		return n == top.next ? top : new Entry(n, top.ref);
541 	}
542 
543 	private static class Entry {
544 		/** Next entry in the hash table's chain list. */
545 		final Entry next;
546 
547 		/** The referenced object. */
548 		final Ref ref;
549 
550 		/**
551 		 * Marked true when ref.get() returns null and the ref is dead.
552 		 * <p>
553 		 * A true here indicates that the ref is no longer accessible, and that
554 		 * we therefore need to eventually purge this Entry object out of the
555 		 * bucket's chain.
556 		 */
557 		volatile boolean dead;
558 
559 		Entry(Entry n, Ref r) {
560 			next = n;
561 			ref = r;
562 		}
563 
564 		final void kill() {
565 			dead = true;
566 			ref.enqueue();
567 		}
568 	}
569 
570 	/** A soft reference wrapped around a cached object. */
571 	private static class Ref extends SoftReference<ByteWindow> {
572 		final PackFile pack;
573 
574 		final long position;
575 
576 		final int size;
577 
578 		long lastAccess;
579 
580 		protected Ref(final PackFile pack, final long position,
581 				final ByteWindow v, final ReferenceQueue<ByteWindow> queue) {
582 			super(v, queue);
583 			this.pack = pack;
584 			this.position = position;
585 			this.size = v.size();
586 		}
587 	}
588 
589 	private static final class Lock {
590 		// Used only for its implicit monitor.
591 	}
592 }