View Javadoc
1   /*
2    * Copyright (C) 2008-2011, Google Inc.
3    * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
4    * and other copyright owners as documented in the project's IP log.
5    *
6    * This program and the accompanying materials are made available
7    * under the terms of the Eclipse Distribution License v1.0 which
8    * accompanies this distribution, is reproduced below, and is
9    * available at http://www.eclipse.org/org/documents/edl-v10.php
10   *
11   * All rights reserved.
12   *
13   * Redistribution and use in source and binary forms, with or
14   * without modification, are permitted provided that the following
15   * conditions are met:
16   *
17   * - Redistributions of source code must retain the above copyright
18   *   notice, this list of conditions and the following disclaimer.
19   *
20   * - Redistributions in binary form must reproduce the above
21   *   copyright notice, this list of conditions and the following
22   *   disclaimer in the documentation and/or other materials provided
23   *   with the distribution.
24   *
25   * - Neither the name of the Eclipse Foundation, Inc. nor the
26   *   names of its contributors may be used to endorse or promote
27   *   products derived from this software without specific prior
28   *   written permission.
29   *
30   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
35   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
39   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
40   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43   */
44  
45  package org.eclipse.jgit.internal.storage.dfs;
46  
47  import java.io.IOException;
48  import java.util.Collection;
49  import java.util.Collections;
50  import java.util.Map;
51  import java.util.concurrent.ConcurrentHashMap;
52  import java.util.concurrent.atomic.AtomicLong;
53  import java.util.concurrent.atomic.AtomicReferenceArray;
54  import java.util.concurrent.locks.ReentrantLock;
55  
56  import org.eclipse.jgit.internal.JGitText;
57  
58  /**
59   * Caches slices of a {@link DfsPackFile} in memory for faster read access.
60   * <p>
61   * The DfsBlockCache serves as a Java based "buffer cache", loading segments of
62   * a DfsPackFile into the JVM heap prior to use. As JGit often wants to do reads
63   * of only tiny slices of a file, the DfsBlockCache tries to smooth out these
64   * tiny reads into larger block-sized IO operations.
65   * <p>
66   * Whenever a cache miss occurs, loading is invoked by exactly one thread for
67   * the given <code>(DfsPackKey,position)</code> key tuple. This is ensured by an
68   * array of locks, with the tuple hashed to a lock instance.
69   * <p>
70   * Its too expensive during object access to be accurate with a least recently
71   * used (LRU) algorithm. Strictly ordering every read is a lot of overhead that
72   * typically doesn't yield a corresponding benefit to the application. This
73   * cache implements a clock replacement algorithm, giving each block one chance
74   * to have been accessed during a sweep of the cache to save itself from
75   * eviction.
76   * <p>
77   * Entities created by the cache are held under hard references, preventing the
78   * Java VM from clearing anything. Blocks are discarded by the replacement
79   * algorithm when adding a new block would cause the cache to exceed its
80   * configured maximum size.
81   * <p>
82   * The key tuple is passed through to methods as a pair of parameters rather
83   * than as a single Object, thus reducing the transient memory allocations of
84   * callers. It is more efficient to avoid the allocation, as we can't be 100%
85   * sure that a JIT would be able to stack-allocate a key tuple.
86   * <p>
87   * The internal hash table does not expand at runtime, instead it is fixed in
88   * size at cache creation time. The internal lock table used to gate load
89   * invocations is also fixed in size.
90   */
91  public final class DfsBlockCache {
92  	private static volatile DfsBlockCache cache;
93  
94  	static {
95  		reconfigure(new DfsBlockCacheConfig());
96  	}
97  
98  	/**
99  	 * Modify the configuration of the window cache.
100 	 * <p>
101 	 * The new configuration is applied immediately, and the existing cache is
102 	 * cleared.
103 	 *
104 	 * @param cfg
105 	 *            the new window cache configuration.
106 	 * @throws IllegalArgumentException
107 	 *             the cache configuration contains one or more invalid
108 	 *             settings, usually too low of a limit.
109 	 */
110 	public static void reconfigure(DfsBlockCacheConfig cfg) {
111 		DfsBlockCache nc = new DfsBlockCache(cfg);
112 		DfsBlockCache oc = cache;
113 		cache = nc;
114 
115 		if (oc != null) {
116 			for (DfsPackFile pack : oc.getPackFiles())
117 				pack.key.cachedSize.set(0);
118 		}
119 	}
120 
121 	/** @return the currently active DfsBlockCache. */
122 	public static DfsBlockCache getInstance() {
123 		return cache;
124 	}
125 
126 	/** Number of entries in {@link #table}. */
127 	private final int tableSize;
128 
129 	/** Hash bucket directory; entries are chained below. */
130 	private final AtomicReferenceArray<HashEntry> table;
131 
132 	/** Locks to prevent concurrent loads for same (PackFile,position). */
133 	private final ReentrantLock[] loadLocks;
134 
135 	/** Maximum number of bytes the cache should hold. */
136 	private final long maxBytes;
137 
138 	/** Pack files smaller than this size can be copied through the cache. */
139 	private final long maxStreamThroughCache;
140 
141 	/**
142 	 * Suggested block size to read from pack files in.
143 	 * <p>
144 	 * If a pack file does not have a native block size, this size will be used.
145 	 * <p>
146 	 * If a pack file has a native size, a whole multiple of the native size
147 	 * will be used until it matches this size.
148 	 */
149 	private final int blockSize;
150 
151 	/** As {@link #blockSize} is a power of 2, bits to shift for a / blockSize. */
152 	private final int blockSizeShift;
153 
154 	/** Cache of pack files, indexed by description. */
155 	private final Map<DfsPackDescription, DfsPackFile> packCache;
156 
157 	/** View of pack files in the pack cache. */
158 	private final Collection<DfsPackFile> packFiles;
159 
160 	/** Number of times a block was found in the cache. */
161 	private final AtomicLong statHit;
162 
163 	/** Number of times a block was not found, and had to be loaded. */
164 	private final AtomicLong statMiss;
165 
166 	/** Number of blocks evicted due to cache being full. */
167 	private volatile long statEvict;
168 
169 	/** Protects the clock and its related data. */
170 	private final ReentrantLock clockLock;
171 
172 	/** Current position of the clock. */
173 	private Ref clockHand;
174 
175 	/** Number of bytes currently loaded in the cache. */
176 	private volatile long liveBytes;
177 
178 	private DfsBlockCache(final DfsBlockCacheConfig cfg) {
179 		tableSize = tableSize(cfg);
180 		if (tableSize < 1)
181 			throw new IllegalArgumentException(JGitText.get().tSizeMustBeGreaterOrEqual1);
182 
183 		table = new AtomicReferenceArray<HashEntry>(tableSize);
184 		loadLocks = new ReentrantLock[32];
185 		for (int i = 0; i < loadLocks.length; i++)
186 			loadLocks[i] = new ReentrantLock(true /* fair */);
187 
188 		int eb = (int) (tableSize * .1);
189 		if (64 < eb)
190 			eb = 64;
191 		else if (eb < 4)
192 			eb = 4;
193 		if (tableSize < eb)
194 			eb = tableSize;
195 
196 		maxBytes = cfg.getBlockLimit();
197 		maxStreamThroughCache = (long) (maxBytes * cfg.getStreamRatio());
198 		blockSize = cfg.getBlockSize();
199 		blockSizeShift = Integer.numberOfTrailingZeros(blockSize);
200 
201 		clockLock = new ReentrantLock(true /* fair */);
202 		clockHand = new Ref<Object>(new DfsPackKey(), -1, 0, null);
203 		clockHand.next = clockHand;
204 
205 		packCache = new ConcurrentHashMap<DfsPackDescription, DfsPackFile>(
206 				16, 0.75f, 1);
207 		packFiles = Collections.unmodifiableCollection(packCache.values());
208 
209 		statHit = new AtomicLong();
210 		statMiss = new AtomicLong();
211 	}
212 
213 	boolean shouldCopyThroughCache(long length) {
214 		return length <= maxStreamThroughCache;
215 	}
216 
217 	/** @return total number of bytes in the cache. */
218 	public long getCurrentSize() {
219 		return liveBytes;
220 	}
221 
222 	/** @return 0..100, defining how full the cache is. */
223 	public long getFillPercentage() {
224 		return getCurrentSize() * 100 / maxBytes;
225 	}
226 
227 	/** @return number of requests for items in the cache. */
228 	public long getHitCount() {
229 		return statHit.get();
230 	}
231 
232 	/** @return number of requests for items not in the cache. */
233 	public long getMissCount() {
234 		return statMiss.get();
235 	}
236 
237 	/** @return total number of requests (hit + miss). */
238 	public long getTotalRequestCount() {
239 		return getHitCount() + getMissCount();
240 	}
241 
242 	/** @return 0..100, defining number of cache hits. */
243 	public long getHitRatio() {
244 		long hits = statHit.get();
245 		long miss = statMiss.get();
246 		long total = hits + miss;
247 		if (total == 0)
248 			return 0;
249 		return hits * 100 / total;
250 	}
251 
252 	/** @return number of evictions performed due to cache being full. */
253 	public long getEvictions() {
254 		return statEvict;
255 	}
256 
257 	/**
258 	 * Get the pack files stored in this cache.
259 	 *
260 	 * @return a collection of pack files, some of which may not actually be
261 	 *             present; the caller should check the pack's cached size.
262 	 */
263 	public Collection<DfsPackFile> getPackFiles() {
264 		return packFiles;
265 	}
266 
267 	DfsPackFile getOrCreate(DfsPackDescription dsc, DfsPackKey key) {
268 		// TODO This table grows without bound. It needs to clean up
269 		// entries that aren't in cache anymore, and aren't being used
270 		// by a live DfsObjDatabase reference.
271 		synchronized (packCache) {
272 			DfsPackFile pack = packCache.get(dsc);
273 			if (pack != null && pack.invalid()) {
274 				packCache.remove(dsc);
275 				pack = null;
276 			}
277 			if (pack == null) {
278 				if (key == null)
279 					key = new DfsPackKey();
280 				pack = new DfsPackFile(this, dsc, key);
281 				packCache.put(dsc, pack);
282 			}
283 			return pack;
284 		}
285 	}
286 
287 	private int hash(int packHash, long off) {
288 		return packHash + (int) (off >>> blockSizeShift);
289 	}
290 
291 	int getBlockSize() {
292 		return blockSize;
293 	}
294 
295 	private static int tableSize(final DfsBlockCacheConfig cfg) {
296 		final int wsz = cfg.getBlockSize();
297 		final long limit = cfg.getBlockLimit();
298 		if (wsz <= 0)
299 			throw new IllegalArgumentException(JGitText.get().invalidWindowSize);
300 		if (limit < wsz)
301 			throw new IllegalArgumentException(JGitText.get().windowSizeMustBeLesserThanLimit);
302 		return (int) Math.min(5 * (limit / wsz) / 2, Integer.MAX_VALUE);
303 	}
304 
305 	/**
306 	 * Lookup a cached object, creating and loading it if it doesn't exist.
307 	 *
308 	 * @param pack
309 	 *            the pack that "contains" the cached object.
310 	 * @param position
311 	 *            offset within <code>pack</code> of the object.
312 	 * @param ctx
313 	 *            current thread's reader.
314 	 * @return the object reference.
315 	 * @throws IOException
316 	 *             the reference was not in the cache and could not be loaded.
317 	 */
318 	DfsBlock getOrLoad(DfsPackFile pack, long position, DfsReader ctx)
319 			throws IOException {
320 		final long requestedPosition = position;
321 		position = pack.alignToBlock(position);
322 
323 		DfsPackKey key = pack.key;
324 		int slot = slot(key, position);
325 		HashEntry e1 = table.get(slot);
326 		DfsBlock v = scan(e1, key, position);
327 		if (v != null) {
328 			statHit.incrementAndGet();
329 			return v;
330 		}
331 
332 		reserveSpace(blockSize);
333 		ReentrantLock regionLock = lockFor(key, position);
334 		regionLock.lock();
335 		try {
336 			HashEntry e2 = table.get(slot);
337 			if (e2 != e1) {
338 				v = scan(e2, key, position);
339 				if (v != null) {
340 					statHit.incrementAndGet();
341 					creditSpace(blockSize);
342 					return v;
343 				}
344 			}
345 
346 			statMiss.incrementAndGet();
347 			boolean credit = true;
348 			try {
349 				v = pack.readOneBlock(position, ctx);
350 				credit = false;
351 			} finally {
352 				if (credit)
353 					creditSpace(blockSize);
354 			}
355 			if (position != v.start) {
356 				// The file discovered its blockSize and adjusted.
357 				position = v.start;
358 				slot = slot(key, position);
359 				e2 = table.get(slot);
360 			}
361 
362 			key.cachedSize.addAndGet(v.size());
363 			Ref<DfsBlock> ref = new Ref<DfsBlock>(key, position, v.size(), v);
364 			ref.hot = true;
365 			for (;;) {
366 				HashEntry n = new HashEntry(clean(e2), ref);
367 				if (table.compareAndSet(slot, e2, n))
368 					break;
369 				e2 = table.get(slot);
370 			}
371 			addToClock(ref, blockSize - v.size());
372 		} finally {
373 			regionLock.unlock();
374 		}
375 
376 		// If the block size changed from the default, it is possible the block
377 		// that was loaded is the wrong block for the requested position.
378 		if (v.contains(pack.key, requestedPosition))
379 			return v;
380 		return getOrLoad(pack, requestedPosition, ctx);
381 	}
382 
383 	@SuppressWarnings("unchecked")
384 	private void reserveSpace(int reserve) {
385 		clockLock.lock();
386 		try {
387 			long live = liveBytes + reserve;
388 			if (maxBytes < live) {
389 				Ref prev = clockHand;
390 				Ref hand = clockHand.next;
391 				do {
392 					if (hand.hot) {
393 						// Value was recently touched. Clear
394 						// hot and give it another chance.
395 						hand.hot = false;
396 						prev = hand;
397 						hand = hand.next;
398 						continue;
399 					} else if (prev == hand)
400 						break;
401 
402 					// No recent access since last scan, kill
403 					// value and remove from clock.
404 					Ref dead = hand;
405 					hand = hand.next;
406 					prev.next = hand;
407 					dead.next = null;
408 					dead.value = null;
409 					live -= dead.size;
410 					dead.pack.cachedSize.addAndGet(-dead.size);
411 					statEvict++;
412 				} while (maxBytes < live);
413 				clockHand = prev;
414 			}
415 			liveBytes = live;
416 		} finally {
417 			clockLock.unlock();
418 		}
419 	}
420 
421 	private void creditSpace(int credit) {
422 		clockLock.lock();
423 		liveBytes -= credit;
424 		clockLock.unlock();
425 	}
426 
427 	private void addToClock(Ref ref, int credit) {
428 		clockLock.lock();
429 		try {
430 			if (credit != 0)
431 				liveBytes -= credit;
432 			Ref ptr = clockHand;
433 			ref.next = ptr.next;
434 			ptr.next = ref;
435 			clockHand = ref;
436 		} finally {
437 			clockLock.unlock();
438 		}
439 	}
440 
441 	void put(DfsBlock v) {
442 		put(v.pack, v.start, v.size(), v);
443 	}
444 
445 	<T> Ref<T> put(DfsPackKey key, long pos, int size, T v) {
446 		int slot = slot(key, pos);
447 		HashEntry e1 = table.get(slot);
448 		Ref<T> ref = scanRef(e1, key, pos);
449 		if (ref != null)
450 			return ref;
451 
452 		reserveSpace(size);
453 		ReentrantLock regionLock = lockFor(key, pos);
454 		regionLock.lock();
455 		try {
456 			HashEntry e2 = table.get(slot);
457 			if (e2 != e1) {
458 				ref = scanRef(e2, key, pos);
459 				if (ref != null) {
460 					creditSpace(size);
461 					return ref;
462 				}
463 			}
464 
465 			key.cachedSize.addAndGet(size);
466 			ref = new Ref<T>(key, pos, size, v);
467 			ref.hot = true;
468 			for (;;) {
469 				HashEntry n = new HashEntry(clean(e2), ref);
470 				if (table.compareAndSet(slot, e2, n))
471 					break;
472 				e2 = table.get(slot);
473 			}
474 			addToClock(ref, 0);
475 		} finally {
476 			regionLock.unlock();
477 		}
478 		return ref;
479 	}
480 
481 	boolean contains(DfsPackKey key, long position) {
482 		return scan(table.get(slot(key, position)), key, position) != null;
483 	}
484 
485 	@SuppressWarnings("unchecked")
486 	<T> T get(DfsPackKey key, long position) {
487 		T val = (T) scan(table.get(slot(key, position)), key, position);
488 		if (val == null)
489 			statMiss.incrementAndGet();
490 		else
491 			statHit.incrementAndGet();
492 		return val;
493 	}
494 
495 	private <T> T scan(HashEntry n, DfsPackKey pack, long position) {
496 		Ref<T> r = scanRef(n, pack, position);
497 		return r != null ? r.get() : null;
498 	}
499 
500 	@SuppressWarnings("unchecked")
501 	private <T> Ref<T> scanRef(HashEntry n, DfsPackKey pack, long position) {
502 		for (; n != null; n = n.next) {
503 			Ref<T> r = n.ref;
504 			if (r.pack == pack && r.position == position)
505 				return r.get() != null ? r : null;
506 		}
507 		return null;
508 	}
509 
510 	void remove(DfsPackFile pack) {
511 		synchronized (packCache) {
512 			packCache.remove(pack.getPackDescription());
513 		}
514 	}
515 
516 	private int slot(DfsPackKey pack, long position) {
517 		return (hash(pack.hash, position) >>> 1) % tableSize;
518 	}
519 
520 	private ReentrantLock lockFor(DfsPackKey pack, long position) {
521 		return loadLocks[(hash(pack.hash, position) >>> 1) % loadLocks.length];
522 	}
523 
524 	private static HashEntry clean(HashEntry top) {
525 		while (top != null && top.ref.next == null)
526 			top = top.next;
527 		if (top == null)
528 			return null;
529 		HashEntry n = clean(top.next);
530 		return n == top.next ? top : new HashEntry(n, top.ref);
531 	}
532 
533 	private static final class HashEntry {
534 		/** Next entry in the hash table's chain list. */
535 		final HashEntry next;
536 
537 		/** The referenced object. */
538 		final Ref ref;
539 
540 		HashEntry(HashEntry n, Ref r) {
541 			next = n;
542 			ref = r;
543 		}
544 	}
545 
546 	static final class Ref<T> {
547 		final DfsPackKey pack;
548 		final long position;
549 		final int size;
550 		volatile T value;
551 		Ref next;
552 		volatile boolean hot;
553 
554 		Ref(DfsPackKey pack, long position, int size, T v) {
555 			this.pack = pack;
556 			this.position = position;
557 			this.size = size;
558 			this.value = v;
559 		}
560 
561 		T get() {
562 			T v = value;
563 			if (v != null)
564 				hot = true;
565 			return v;
566 		}
567 
568 		boolean has() {
569 			return value != null;
570 		}
571 	}
572 }