View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.treewalk;
12  
13  import java.io.IOException;
14  
15  import org.eclipse.jgit.annotations.Nullable;
16  import org.eclipse.jgit.errors.CorruptObjectException;
17  import org.eclipse.jgit.lib.FileMode;
18  import org.eclipse.jgit.lib.ObjectReader;
19  import org.eclipse.jgit.lib.Repository;
20  
21  /**
22   * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
23   * <p>
24   * Due to the way a Git tree is organized the standard
25   * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
26   * when merging two or more trees together. In the standard TreeWalk the file
27   * will be returned first, and then much later the directory will be returned.
28   * This makes it impossible for the application to efficiently detect and handle
29   * the conflict.
30   * <p>
31   * Using this walk implementation causes the directory to report earlier than
32   * usual, at the same time as the non-directory entry. This permits the
33   * application to handle the D/F conflict in a single step. The directory is
34   * returned only once, so it does not get returned later in the iteration.
35   * <p>
36   * When a D/F conflict is detected
37   * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
38   * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
39   * the subtree, no matter which iterator originally supplied the subtree.
40   * <p>
41   * Because conflicted directories report early, using this walk implementation
42   * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
43   * automatic resorting to run and fix the entry ordering.
44   * <p>
45   * This walk implementation requires more CPU to implement a look-ahead and a
46   * look-behind to merge a D/F pair together, or to skip a previously reported
47   * directory. In typical Git repositories the look-ahead cost is 0 and the
48   * look-behind doesn't trigger, as users tend not to create trees which contain
49   * both "foo" as a directory and "foo.c" as a file.
50   * <p>
51   * In the worst-case however several thousand look-ahead steps per walk step may
52   * be necessary, making the overhead quite significant. Since this worst-case
53   * should never happen this walk implementation has made the time/space tradeoff
54   * in favor of more-time/less-space, as that better suits the typical case.
55   */
56  public class NameConflictTreeWalk extends TreeWalk {
57  	private static final int TREE_MODE = FileMode.TREE.getBits();
58  
59  	private boolean fastMinHasMatch;
60  
61  	private AbstractTreeIterator dfConflict;
62  
63  	/**
64  	 * Create a new tree walker for a given repository.
65  	 *
66  	 * @param repo
67  	 *            the repository the walker will obtain data from.
68  	 */
69  	public NameConflictTreeWalk(Repository repo) {
70  		super(repo);
71  	}
72  
73  	/**
74  	 * Create a new tree walker for a given repository.
75  	 *
76  	 * @param repo
77  	 *            the repository the walker will obtain data from.
78  	 * @param or
79  	 *            the reader the walker will obtain tree data from.
80  	 * @since 4.3
81  	 */
82  	public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
83  		super(repo, or);
84  	}
85  
86  	/**
87  	 * Create a new tree walker for a given repository.
88  	 *
89  	 * @param or
90  	 *            the reader the walker will obtain tree data from.
91  	 */
92  	public NameConflictTreeWalk(ObjectReader or) {
93  		super(or);
94  	}
95  
96  	@Override
97  	AbstractTreeIterator min() throws CorruptObjectException {
98  		for (;;) {
99  			final AbstractTreeIterator minRef = fastMin();
100 			if (fastMinHasMatch)
101 				return minRef;
102 
103 			if (isTree(minRef)) {
104 				if (skipEntry(minRef)) {
105 					for (AbstractTreeIterator t : trees) {
106 						if (t.matches == minRef) {
107 							t.next(1);
108 							t.matches = null;
109 						}
110 					}
111 					continue;
112 				}
113 				return minRef;
114 			}
115 
116 			return combineDF(minRef);
117 		}
118 	}
119 
120 	private AbstractTreeIterator fastMin() {
121 		fastMinHasMatch = true;
122 
123 		int i = 0;
124 		AbstractTreeIterator minRef = trees[i];
125 		while (minRef.eof() && ++i < trees.length)
126 			minRef = trees[i];
127 		if (minRef.eof())
128 			return minRef;
129 
130 		boolean hasConflict = false;
131 		minRef.matches = minRef;
132 		while (++i < trees.length) {
133 			final AbstractTreeIterator t = trees[i];
134 			if (t.eof())
135 				continue;
136 
137 			final int cmp = t.pathCompare(minRef);
138 			if (cmp < 0) {
139 				if (fastMinHasMatch && isTree(minRef) && !isTree(t)
140 						&& nameEqual(minRef, t)) {
141 					// We used to be at a tree, but now we are at a file
142 					// with the same name. Allow the file to match the
143 					// tree anyway.
144 					//
145 					t.matches = minRef;
146 					hasConflict = true;
147 				} else {
148 					fastMinHasMatch = false;
149 					t.matches = t;
150 					minRef = t;
151 				}
152 			} else if (cmp == 0) {
153 				// Exact name/mode match is best.
154 				//
155 				t.matches = minRef;
156 			} else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
157 					&& !isGitlink(minRef) && nameEqual(t, minRef)) {
158 				// The minimum is a file (non-tree) but the next entry
159 				// of this iterator is a tree whose name matches our file.
160 				// This is a classic D/F conflict and commonly occurs like
161 				// this, with no gaps in between the file and directory.
162 				//
163 				// Use the tree as the minimum instead (see combineDF).
164 				//
165 
166 				for (int k = 0; k < i; k++) {
167 					final AbstractTreeIterator p = trees[k];
168 					if (p.matches == minRef)
169 						p.matches = t;
170 				}
171 				t.matches = t;
172 				minRef = t;
173 				hasConflict = true;
174 			} else
175 				fastMinHasMatch = false;
176 		}
177 
178 		if (hasConflict && fastMinHasMatch && dfConflict == null)
179 			dfConflict = minRef;
180 		return minRef;
181 	}
182 
183 	private static boolean nameEqual(final AbstractTreeIterator a,
184 			final AbstractTreeIterator b) {
185 		return a.pathCompare(b, TREE_MODE) == 0;
186 	}
187 
188 	private boolean isGitlink(AbstractTreeIterator p) {
189 		return FileMode.GITLINK.equals(p.mode);
190 	}
191 
192 	private static boolean isTree(AbstractTreeIterator p) {
193 		return FileMode.TREE.equals(p.mode);
194 	}
195 
196 	private boolean skipEntry(AbstractTreeIterator minRef)
197 			throws CorruptObjectException {
198 		// A tree D/F may have been handled earlier. We need to
199 		// not report this path if it has already been reported.
200 		//
201 		for (AbstractTreeIterator t : trees) {
202 			if (t.matches == minRef || t.first())
203 				continue;
204 
205 			int stepsBack = 0;
206 			for (;;) {
207 				stepsBack++;
208 				t.back(1);
209 
210 				final int cmp = t.pathCompare(minRef, 0);
211 				if (cmp == 0) {
212 					// We have already seen this "$path" before. Skip it.
213 					//
214 					t.next(stepsBack);
215 					return true;
216 				} else if (cmp < 0 || t.first()) {
217 					// We cannot find "$path" in t; it will never appear.
218 					//
219 					t.next(stepsBack);
220 					break;
221 				}
222 			}
223 		}
224 
225 		// We have never seen the current path before.
226 		//
227 		return false;
228 	}
229 
230 	private AbstractTreeIteratorg/eclipse/jgit/treewalk/AbstractTreeIterator.html#AbstractTreeIterator">AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
231 			throws CorruptObjectException {
232 		// Look for a possible D/F conflict forward in the tree(s)
233 		// as there may be a "$path/" which matches "$path". Make
234 		// such entries match this entry.
235 		//
236 		AbstractTreeIterator treeMatch = null;
237 		for (AbstractTreeIterator t : trees) {
238 			if (t.matches == minRef || t.eof())
239 				continue;
240 
241 			for (;;) {
242 				final int cmp = t.pathCompare(minRef, TREE_MODE);
243 				if (cmp < 0) {
244 					// The "$path/" may still appear later.
245 					//
246 					t.matchShift++;
247 					t.next(1);
248 					if (t.eof()) {
249 						t.back(t.matchShift);
250 						t.matchShift = 0;
251 						break;
252 					}
253 				} else if (cmp == 0) {
254 					// We have a conflict match here.
255 					//
256 					t.matches = minRef;
257 					treeMatch = t;
258 					break;
259 				} else {
260 					// A conflict match is not possible.
261 					//
262 					if (t.matchShift != 0) {
263 						t.back(t.matchShift);
264 						t.matchShift = 0;
265 					}
266 					break;
267 				}
268 			}
269 		}
270 
271 		if (treeMatch != null) {
272 			// If we do have a conflict use one of the directory
273 			// matching iterators instead of the file iterator.
274 			// This way isSubtree is true and isRecursive works.
275 			//
276 			for (AbstractTreeIterator t : trees)
277 				if (t.matches == minRef)
278 					t.matches = treeMatch;
279 
280 			if (dfConflict == null && !isGitlink(minRef)) {
281 				dfConflict = treeMatch;
282 			}
283 
284 			return treeMatch;
285 		}
286 
287 		return minRef;
288 	}
289 
290 	@Override
291 	void popEntriesEqual() throws CorruptObjectException {
292 		final AbstractTreeIterator ch = currentHead;
293 		for (AbstractTreeIterator t : trees) {
294 			if (t.matches == ch) {
295 				if (t.matchShift == 0)
296 					t.next(1);
297 				else {
298 					t.back(t.matchShift);
299 					t.matchShift = 0;
300 				}
301 				t.matches = null;
302 			}
303 		}
304 
305 		if (ch == dfConflict)
306 			dfConflict = null;
307 	}
308 
309 	@Override
310 	void skipEntriesEqual() throws CorruptObjectException {
311 		final AbstractTreeIterator ch = currentHead;
312 		for (AbstractTreeIterator t : trees) {
313 			if (t.matches == ch) {
314 				if (t.matchShift == 0)
315 					t.skip();
316 				else {
317 					t.back(t.matchShift);
318 					t.matchShift = 0;
319 				}
320 				t.matches = null;
321 			}
322 		}
323 
324 		if (ch == dfConflict)
325 			dfConflict = null;
326 	}
327 
328 	@Override
329 	void stopWalk() throws IOException {
330 		if (!needsStopWalk()) {
331 			return;
332 		}
333 
334 		// Name conflicts make aborting early difficult. Multiple paths may
335 		// exist between the file and directory versions of a name. To ensure
336 		// the directory version is skipped over (as it was previously visited
337 		// during the file version step) requires popping up the stack and
338 		// finishing out each subtree that the walker dove into. Siblings in
339 		// parents do not need to be recursed into, bounding the cost.
340 		for (;;) {
341 			AbstractTreeIterator t = min();
342 			if (t.eof()) {
343 				if (depth > 0) {
344 					exitSubtree();
345 					popEntriesEqual();
346 					continue;
347 				}
348 				return;
349 			}
350 			currentHead = t;
351 			skipEntriesEqual();
352 		}
353 	}
354 
355 	private boolean needsStopWalk() {
356 		for (AbstractTreeIterator t : trees) {
357 			if (t.needsStopWalk()) {
358 				return true;
359 			}
360 		}
361 		return false;
362 	}
363 
364 	/**
365 	 * True if the current entry is covered by a directory/file conflict.
366 	 *
367 	 * This means that for some prefix of the current entry's path, this walk
368 	 * has detected a directory/file conflict. Also true if the current entry
369 	 * itself is a directory/file conflict.
370 	 *
371 	 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
372 	 * true then you know that either for path foo or for path foo/bar files and
373 	 * folders were detected.
374 	 *
375 	 * @return <code>true</code> if the current entry is covered by a
376 	 *         directory/file conflict, <code>false</code> otherwise
377 	 */
378 	public boolean isDirectoryFileConflict() {
379 		return dfConflict != null;
380 	}
381 }