View Javadoc
1   /*
2    * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.file;
45  
46  import java.text.MessageFormat;
47  
48  import org.eclipse.jgit.errors.CorruptObjectException;
49  import org.eclipse.jgit.internal.JGitText;
50  import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
51  import org.eclipse.jgit.lib.ObjectId;
52  
53  /**
54   * <p>
55   * Reverse index for forward pack index. Provides operations based on offset
56   * instead of object id. Such offset-based reverse lookups are performed in
57   * O(log n) time.
58   * </p>
59   *
60   * @see PackIndex
61   * @see PackFile
62   */
63  public class PackReverseIndex {
64  	/** Index we were created from, and that has our ObjectId data. */
65  	private final PackIndex index;
66  
67  	/** The number of bytes per entry in the offsetIndex. */
68  	private final long bucketSize;
69  
70  	/**
71  	 * An index into the nth mapping, where the value is the position after the
72  	 * the last index that contains the values of the bucket. For example given
73  	 * offset o (and bucket = o / bucketSize), the offset will be contained in
74  	 * the range nth[offsetIndex[bucket - 1]] inclusive to
75  	 * nth[offsetIndex[bucket]] exclusive.
76  	 *
77  	 * See {@link #binarySearch}
78  	 */
79  	private final int[] offsetIndex;
80  
81  	/** Mapping from indices in offset order to indices in SHA-1 order. */
82  	private final int[] nth;
83  
84  	/**
85  	 * Create reverse index from straight/forward pack index, by indexing all
86  	 * its entries.
87  	 *
88  	 * @param packIndex
89  	 *            forward index - entries to (reverse) index.
90  	 */
91  	public PackReverseIndex(PackIndex packIndex) {
92  		index = packIndex;
93  
94  		final long cnt = index.getObjectCount();
95  		if (cnt + 1 > Integer.MAX_VALUE)
96  			throw new IllegalArgumentException(
97  					JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
98  
99  		if (cnt == 0) {
100 			bucketSize = Long.MAX_VALUE;
101 			offsetIndex = new int[1];
102 			nth = new int[0];
103 			return;
104 		}
105 
106 		final long[] offsetsBySha1 = new long[(int) cnt];
107 
108 		long maxOffset = 0;
109 		int ith = 0;
110 		for (MutableEntry me : index) {
111 			final long o = me.getOffset();
112 			offsetsBySha1[ith++] = o;
113 			if (o > maxOffset)
114 				maxOffset = o;
115 		}
116 
117 		bucketSize = maxOffset / cnt + 1;
118 		int[] bucketIndex = new int[(int) cnt];
119 		int[] bucketValues = new int[(int) cnt + 1];
120 		for (int oi = 0; oi < offsetsBySha1.length; oi++) {
121 			final long o = offsetsBySha1[oi];
122 			final int bucket = (int) (o / bucketSize);
123 			final int bucketValuesPos = oi + 1;
124 			final int current = bucketIndex[bucket];
125 			bucketIndex[bucket] = bucketValuesPos;
126 			bucketValues[bucketValuesPos] = current;
127 		}
128 
129 		int nthByOffset = 0;
130 		nth = new int[offsetsBySha1.length];
131 		offsetIndex = bucketIndex; // Reuse the allocation
132 		for (int bi = 0; bi < bucketIndex.length; bi++) {
133 			final int start = nthByOffset;
134 			// Insertion sort of the values in the bucket.
135 			for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
136 				final int nthBySha1 = vi - 1;
137 				final long o = offsetsBySha1[nthBySha1];
138 				int insertion = nthByOffset++;
139 				for (; start < insertion; insertion--) {
140 					if (o > offsetsBySha1[nth[insertion - 1]])
141 						break;
142 					nth[insertion] = nth[insertion - 1];
143 				}
144 				nth[insertion] = nthBySha1;
145 			}
146 			offsetIndex[bi] = nthByOffset;
147 		}
148 	}
149 
150 	/**
151 	 * Search for object id with the specified start offset in this pack
152 	 * (reverse) index.
153 	 *
154 	 * @param offset
155 	 *            start offset of object to find.
156 	 * @return object id for this offset, or null if no object was found.
157 	 */
158 	public ObjectId findObject(long offset) {
159 		final int ith = binarySearch(offset);
160 		if (ith < 0)
161 			return null;
162 		return index.getObjectId(nth[ith]);
163 	}
164 
165 	/**
166 	 * Search for the next offset to the specified offset in this pack (reverse)
167 	 * index.
168 	 *
169 	 * @param offset
170 	 *            start offset of previous object (must be valid-existing
171 	 *            offset).
172 	 * @param maxOffset
173 	 *            maximum offset in a pack (returned when there is no next
174 	 *            offset).
175 	 * @return offset of the next object in a pack or maxOffset if provided
176 	 *         offset was the last one.
177 	 * @throws org.eclipse.jgit.errors.CorruptObjectException
178 	 *             when there is no object with the provided offset.
179 	 */
180 	public long findNextOffset(long offset, long maxOffset)
181 			throws CorruptObjectException {
182 		final int ith = binarySearch(offset);
183 		if (ith < 0)
184 			throw new CorruptObjectException(
185 					MessageFormat.format(
186 							JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
187 							Long.valueOf(offset)));
188 
189 		if (ith + 1 == nth.length)
190 			return maxOffset;
191 		return index.getOffset(nth[ith + 1]);
192 	}
193 
194 	int findPostion(long offset) {
195 		return binarySearch(offset);
196 	}
197 
198 	private int binarySearch(long offset) {
199 		int bucket = (int) (offset / bucketSize);
200 		int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
201 		int high = offsetIndex[bucket];
202 		while (low < high) {
203 			final int mid = (low + high) >>> 1;
204 			final long o = index.getOffset(nth[mid]);
205 			if (offset < o)
206 				high = mid;
207 			else if (offset == o)
208 				return mid;
209 			else
210 				low = mid + 1;
211 		}
212 		return -1;
213 	}
214 
215 	ObjectId findObjectByPosition(int nthPosition) {
216 		return index.getObjectId(nth[nthPosition]);
217 	}
218 }