View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.treewalk;
45  
46  import java.io.IOException;
47  
48  import org.eclipse.jgit.annotations.Nullable;
49  import org.eclipse.jgit.errors.CorruptObjectException;
50  import org.eclipse.jgit.lib.FileMode;
51  import org.eclipse.jgit.lib.ObjectReader;
52  import org.eclipse.jgit.lib.Repository;
53  
54  /**
55   * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
56   * <p>
57   * Due to the way a Git tree is organized the standard
58   * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
59   * when merging two or more trees together. In the standard TreeWalk the file
60   * will be returned first, and then much later the directory will be returned.
61   * This makes it impossible for the application to efficiently detect and handle
62   * the conflict.
63   * <p>
64   * Using this walk implementation causes the directory to report earlier than
65   * usual, at the same time as the non-directory entry. This permits the
66   * application to handle the D/F conflict in a single step. The directory is
67   * returned only once, so it does not get returned later in the iteration.
68   * <p>
69   * When a D/F conflict is detected
70   * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
71   * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
72   * the subtree, no matter which iterator originally supplied the subtree.
73   * <p>
74   * Because conflicted directories report early, using this walk implementation
75   * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
76   * automatic resorting to run and fix the entry ordering.
77   * <p>
78   * This walk implementation requires more CPU to implement a look-ahead and a
79   * look-behind to merge a D/F pair together, or to skip a previously reported
80   * directory. In typical Git repositories the look-ahead cost is 0 and the
81   * look-behind doesn't trigger, as users tend not to create trees which contain
82   * both "foo" as a directory and "foo.c" as a file.
83   * <p>
84   * In the worst-case however several thousand look-ahead steps per walk step may
85   * be necessary, making the overhead quite significant. Since this worst-case
86   * should never happen this walk implementation has made the time/space tradeoff
87   * in favor of more-time/less-space, as that better suits the typical case.
88   */
89  public class NameConflictTreeWalk extends TreeWalk {
90  	private static final int TREE_MODE = FileMode.TREE.getBits();
91  
92  	private boolean fastMinHasMatch;
93  
94  	private AbstractTreeIterator dfConflict;
95  
96  	/**
97  	 * Create a new tree walker for a given repository.
98  	 *
99  	 * @param repo
100 	 *            the repository the walker will obtain data from.
101 	 */
102 	public NameConflictTreeWalk(Repository repo) {
103 		super(repo);
104 	}
105 
106 	/**
107 	 * Create a new tree walker for a given repository.
108 	 *
109 	 * @param repo
110 	 *            the repository the walker will obtain data from.
111 	 * @param or
112 	 *            the reader the walker will obtain tree data from.
113 	 * @since 4.3
114 	 */
115 	public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
116 		super(repo, or);
117 	}
118 
119 	/**
120 	 * Create a new tree walker for a given repository.
121 	 *
122 	 * @param or
123 	 *            the reader the walker will obtain tree data from.
124 	 */
125 	public NameConflictTreeWalk(ObjectReader or) {
126 		super(or);
127 	}
128 
129 	@Override
130 	AbstractTreeIterator min() throws CorruptObjectException {
131 		for (;;) {
132 			final AbstractTreeIterator minRef = fastMin();
133 			if (fastMinHasMatch)
134 				return minRef;
135 
136 			if (isTree(minRef)) {
137 				if (skipEntry(minRef)) {
138 					for (AbstractTreeIterator t : trees) {
139 						if (t.matches == minRef) {
140 							t.next(1);
141 							t.matches = null;
142 						}
143 					}
144 					continue;
145 				}
146 				return minRef;
147 			}
148 
149 			return combineDF(minRef);
150 		}
151 	}
152 
153 	private AbstractTreeIterator fastMin() {
154 		fastMinHasMatch = true;
155 
156 		int i = 0;
157 		AbstractTreeIterator minRef = trees[i];
158 		while (minRef.eof() && ++i < trees.length)
159 			minRef = trees[i];
160 		if (minRef.eof())
161 			return minRef;
162 
163 		boolean hasConflict = false;
164 		minRef.matches = minRef;
165 		while (++i < trees.length) {
166 			final AbstractTreeIterator t = trees[i];
167 			if (t.eof())
168 				continue;
169 
170 			final int cmp = t.pathCompare(minRef);
171 			if (cmp < 0) {
172 				if (fastMinHasMatch && isTree(minRef) && !isTree(t)
173 						&& nameEqual(minRef, t)) {
174 					// We used to be at a tree, but now we are at a file
175 					// with the same name. Allow the file to match the
176 					// tree anyway.
177 					//
178 					t.matches = minRef;
179 					hasConflict = true;
180 				} else {
181 					fastMinHasMatch = false;
182 					t.matches = t;
183 					minRef = t;
184 				}
185 			} else if (cmp == 0) {
186 				// Exact name/mode match is best.
187 				//
188 				t.matches = minRef;
189 			} else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
190 					&& !isGitlink(minRef) && nameEqual(t, minRef)) {
191 				// The minimum is a file (non-tree) but the next entry
192 				// of this iterator is a tree whose name matches our file.
193 				// This is a classic D/F conflict and commonly occurs like
194 				// this, with no gaps in between the file and directory.
195 				//
196 				// Use the tree as the minimum instead (see combineDF).
197 				//
198 
199 				for (int k = 0; k < i; k++) {
200 					final AbstractTreeIterator p = trees[k];
201 					if (p.matches == minRef)
202 						p.matches = t;
203 				}
204 				t.matches = t;
205 				minRef = t;
206 				hasConflict = true;
207 			} else
208 				fastMinHasMatch = false;
209 		}
210 
211 		if (hasConflict && fastMinHasMatch && dfConflict == null)
212 			dfConflict = minRef;
213 		return minRef;
214 	}
215 
216 	private static boolean nameEqual(final AbstractTreeIterator a,
217 			final AbstractTreeIterator b) {
218 		return a.pathCompare(b, TREE_MODE) == 0;
219 	}
220 
221 	private boolean isGitlink(AbstractTreeIterator p) {
222 		return FileMode.GITLINK.equals(p.mode);
223 	}
224 
225 	private static boolean isTree(AbstractTreeIterator p) {
226 		return FileMode.TREE.equals(p.mode);
227 	}
228 
229 	private boolean skipEntry(AbstractTreeIterator minRef)
230 			throws CorruptObjectException {
231 		// A tree D/F may have been handled earlier. We need to
232 		// not report this path if it has already been reported.
233 		//
234 		for (AbstractTreeIterator t : trees) {
235 			if (t.matches == minRef || t.first())
236 				continue;
237 
238 			int stepsBack = 0;
239 			for (;;) {
240 				stepsBack++;
241 				t.back(1);
242 
243 				final int cmp = t.pathCompare(minRef, 0);
244 				if (cmp == 0) {
245 					// We have already seen this "$path" before. Skip it.
246 					//
247 					t.next(stepsBack);
248 					return true;
249 				} else if (cmp < 0 || t.first()) {
250 					// We cannot find "$path" in t; it will never appear.
251 					//
252 					t.next(stepsBack);
253 					break;
254 				}
255 			}
256 		}
257 
258 		// We have never seen the current path before.
259 		//
260 		return false;
261 	}
262 
263 	private AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
264 			throws CorruptObjectException {
265 		// Look for a possible D/F conflict forward in the tree(s)
266 		// as there may be a "$path/" which matches "$path". Make
267 		// such entries match this entry.
268 		//
269 		AbstractTreeIterator treeMatch = null;
270 		for (AbstractTreeIterator t : trees) {
271 			if (t.matches == minRef || t.eof())
272 				continue;
273 
274 			for (;;) {
275 				final int cmp = t.pathCompare(minRef, TREE_MODE);
276 				if (cmp < 0) {
277 					// The "$path/" may still appear later.
278 					//
279 					t.matchShift++;
280 					t.next(1);
281 					if (t.eof()) {
282 						t.back(t.matchShift);
283 						t.matchShift = 0;
284 						break;
285 					}
286 				} else if (cmp == 0) {
287 					// We have a conflict match here.
288 					//
289 					t.matches = minRef;
290 					treeMatch = t;
291 					break;
292 				} else {
293 					// A conflict match is not possible.
294 					//
295 					if (t.matchShift != 0) {
296 						t.back(t.matchShift);
297 						t.matchShift = 0;
298 					}
299 					break;
300 				}
301 			}
302 		}
303 
304 		if (treeMatch != null) {
305 			// If we do have a conflict use one of the directory
306 			// matching iterators instead of the file iterator.
307 			// This way isSubtree is true and isRecursive works.
308 			//
309 			for (AbstractTreeIterator t : trees)
310 				if (t.matches == minRef)
311 					t.matches = treeMatch;
312 
313 			if (dfConflict == null && !isGitlink(minRef)) {
314 				dfConflict = treeMatch;
315 			}
316 
317 			return treeMatch;
318 		}
319 
320 		return minRef;
321 	}
322 
323 	@Override
324 	void popEntriesEqual() throws CorruptObjectException {
325 		final AbstractTreeIterator ch = currentHead;
326 		for (int i = 0; i < trees.length; i++) {
327 			final AbstractTreeIterator t = trees[i];
328 			if (t.matches == ch) {
329 				if (t.matchShift == 0)
330 					t.next(1);
331 				else {
332 					t.back(t.matchShift);
333 					t.matchShift = 0;
334 				}
335 				t.matches = null;
336 			}
337 		}
338 
339 		if (ch == dfConflict)
340 			dfConflict = null;
341 	}
342 
343 	@Override
344 	void skipEntriesEqual() throws CorruptObjectException {
345 		final AbstractTreeIterator ch = currentHead;
346 		for (int i = 0; i < trees.length; i++) {
347 			final AbstractTreeIterator t = trees[i];
348 			if (t.matches == ch) {
349 				if (t.matchShift == 0)
350 					t.skip();
351 				else {
352 					t.back(t.matchShift);
353 					t.matchShift = 0;
354 				}
355 				t.matches = null;
356 			}
357 		}
358 
359 		if (ch == dfConflict)
360 			dfConflict = null;
361 	}
362 
363 	@Override
364 	void stopWalk() throws IOException {
365 		if (!needsStopWalk()) {
366 			return;
367 		}
368 
369 		// Name conflicts make aborting early difficult. Multiple paths may
370 		// exist between the file and directory versions of a name. To ensure
371 		// the directory version is skipped over (as it was previously visited
372 		// during the file version step) requires popping up the stack and
373 		// finishing out each subtree that the walker dove into. Siblings in
374 		// parents do not need to be recursed into, bounding the cost.
375 		for (;;) {
376 			AbstractTreeIterator t = min();
377 			if (t.eof()) {
378 				if (depth > 0) {
379 					exitSubtree();
380 					popEntriesEqual();
381 					continue;
382 				}
383 				return;
384 			}
385 			currentHead = t;
386 			skipEntriesEqual();
387 		}
388 	}
389 
390 	private boolean needsStopWalk() {
391 		for (AbstractTreeIterator t : trees) {
392 			if (t.needsStopWalk()) {
393 				return true;
394 			}
395 		}
396 		return false;
397 	}
398 
399 	/**
400 	 * True if the current entry is covered by a directory/file conflict.
401 	 *
402 	 * This means that for some prefix of the current entry's path, this walk
403 	 * has detected a directory/file conflict. Also true if the current entry
404 	 * itself is a directory/file conflict.
405 	 *
406 	 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
407 	 * true then you know that either for path foo or for path foo/bar files and
408 	 * folders were detected.
409 	 *
410 	 * @return <code>true</code> if the current entry is covered by a
411 	 *         directory/file conflict, <code>false</code> otherwise
412 	 */
413 	public boolean isDirectoryFileConflict() {
414 		return dfConflict != null;
415 	}
416 }