View Javadoc
1   /*
2    * Copyright (C) 2008, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.treewalk;
45  
46  import org.eclipse.jgit.dircache.DirCacheBuilder;
47  import org.eclipse.jgit.errors.CorruptObjectException;
48  import org.eclipse.jgit.lib.FileMode;
49  import org.eclipse.jgit.lib.ObjectReader;
50  import org.eclipse.jgit.lib.Repository;
51  
52  /**
53   * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
54   * <p>
55   * Due to the way a Git tree is organized the standard {@link TreeWalk} won't
56   * easily find a D/F conflict when merging two or more trees together. In the
57   * standard TreeWalk the file will be returned first, and then much later the
58   * directory will be returned. This makes it impossible for the application to
59   * efficiently detect and handle the conflict.
60   * <p>
61   * Using this walk implementation causes the directory to report earlier than
62   * usual, at the same time as the non-directory entry. This permits the
63   * application to handle the D/F conflict in a single step. The directory is
64   * returned only once, so it does not get returned later in the iteration.
65   * <p>
66   * When a D/F conflict is detected {@link TreeWalk#isSubtree()} will return true
67   * and {@link TreeWalk#enterSubtree()} will recurse into the subtree, no matter
68   * which iterator originally supplied the subtree.
69   * <p>
70   * Because conflicted directories report early, using this walk implementation
71   * to populate a {@link DirCacheBuilder} may cause the automatic resorting to
72   * run and fix the entry ordering.
73   * <p>
74   * This walk implementation requires more CPU to implement a look-ahead and a
75   * look-behind to merge a D/F pair together, or to skip a previously reported
76   * directory. In typical Git repositories the look-ahead cost is 0 and the
77   * look-behind doesn't trigger, as users tend not to create trees which contain
78   * both "foo" as a directory and "foo.c" as a file.
79   * <p>
80   * In the worst-case however several thousand look-ahead steps per walk step may
81   * be necessary, making the overhead quite significant. Since this worst-case
82   * should never happen this walk implementation has made the time/space tradeoff
83   * in favor of more-time/less-space, as that better suits the typical case.
84   */
85  public class NameConflictTreeWalk extends TreeWalk {
86  	private static final int TREE_MODE = FileMode.TREE.getBits();
87  
88  	private boolean fastMinHasMatch;
89  
90  	private AbstractTreeIterator dfConflict;
91  
92  	/**
93  	 * Create a new tree walker for a given repository.
94  	 *
95  	 * @param repo
96  	 *            the repository the walker will obtain data from.
97  	 */
98  	public NameConflictTreeWalk(final Repository repo) {
99  		this(repo.newObjectReader());
100 	}
101 
102 	/**
103 	 * Create a new tree walker for a given repository.
104 	 *
105 	 * @param or
106 	 *            the reader the walker will obtain tree data from.
107 	 */
108 	public NameConflictTreeWalk(final ObjectReader or) {
109 		super(or);
110 	}
111 
112 	@Override
113 	AbstractTreeIterator min() throws CorruptObjectException {
114 		for (;;) {
115 			final AbstractTreeIterator minRef = fastMin();
116 			if (fastMinHasMatch)
117 				return minRef;
118 
119 			if (isTree(minRef)) {
120 				if (skipEntry(minRef)) {
121 					for (final AbstractTreeIterator t : trees) {
122 						if (t.matches == minRef) {
123 							t.next(1);
124 							t.matches = null;
125 						}
126 					}
127 					continue;
128 				}
129 				return minRef;
130 			}
131 
132 			return combineDF(minRef);
133 		}
134 	}
135 
136 	private AbstractTreeIterator fastMin() {
137 		fastMinHasMatch = true;
138 
139 		int i = 0;
140 		AbstractTreeIterator minRef = trees[i];
141 		while (minRef.eof() && ++i < trees.length)
142 			minRef = trees[i];
143 		if (minRef.eof())
144 			return minRef;
145 
146 		boolean hasConflict = false;
147 		minRef.matches = minRef;
148 		while (++i < trees.length) {
149 			final AbstractTreeIterator t = trees[i];
150 			if (t.eof())
151 				continue;
152 
153 			final int cmp = t.pathCompare(minRef);
154 			if (cmp < 0) {
155 				if (fastMinHasMatch && isTree(minRef) && !isTree(t)
156 						&& nameEqual(minRef, t)) {
157 					// We used to be at a tree, but now we are at a file
158 					// with the same name. Allow the file to match the
159 					// tree anyway.
160 					//
161 					t.matches = minRef;
162 					hasConflict = true;
163 				} else {
164 					fastMinHasMatch = false;
165 					t.matches = t;
166 					minRef = t;
167 				}
168 			} else if (cmp == 0) {
169 				// Exact name/mode match is best.
170 				//
171 				t.matches = minRef;
172 			} else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
173 					&& nameEqual(t, minRef)) {
174 				// The minimum is a file (non-tree) but the next entry
175 				// of this iterator is a tree whose name matches our file.
176 				// This is a classic D/F conflict and commonly occurs like
177 				// this, with no gaps in between the file and directory.
178 				//
179 				// Use the tree as the minimum instead (see combineDF).
180 				//
181 
182 				for (int k = 0; k < i; k++) {
183 					final AbstractTreeIterator p = trees[k];
184 					if (p.matches == minRef)
185 						p.matches = t;
186 				}
187 				t.matches = t;
188 				minRef = t;
189 				hasConflict = true;
190 			} else
191 				fastMinHasMatch = false;
192 		}
193 
194 		if (hasConflict && fastMinHasMatch && dfConflict == null)
195 			dfConflict = minRef;
196 		return minRef;
197 	}
198 
199 	private static boolean nameEqual(final AbstractTreeIterator a,
200 			final AbstractTreeIterator b) {
201 		return a.pathCompare(b, TREE_MODE) == 0;
202 	}
203 
204 	private static boolean isTree(final AbstractTreeIterator p) {
205 		return FileMode.TREE.equals(p.mode);
206 	}
207 
208 	private boolean skipEntry(final AbstractTreeIterator minRef)
209 			throws CorruptObjectException {
210 		// A tree D/F may have been handled earlier. We need to
211 		// not report this path if it has already been reported.
212 		//
213 		for (final AbstractTreeIterator t : trees) {
214 			if (t.matches == minRef || t.first())
215 				continue;
216 
217 			int stepsBack = 0;
218 			for (;;) {
219 				stepsBack++;
220 				t.back(1);
221 
222 				final int cmp = t.pathCompare(minRef, 0);
223 				if (cmp == 0) {
224 					// We have already seen this "$path" before. Skip it.
225 					//
226 					t.next(stepsBack);
227 					return true;
228 				} else if (cmp < 0 || t.first()) {
229 					// We cannot find "$path" in t; it will never appear.
230 					//
231 					t.next(stepsBack);
232 					break;
233 				}
234 			}
235 		}
236 
237 		// We have never seen the current path before.
238 		//
239 		return false;
240 	}
241 
242 	private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
243 			throws CorruptObjectException {
244 		// Look for a possible D/F conflict forward in the tree(s)
245 		// as there may be a "$path/" which matches "$path". Make
246 		// such entries match this entry.
247 		//
248 		AbstractTreeIterator treeMatch = null;
249 		for (final AbstractTreeIterator t : trees) {
250 			if (t.matches == minRef || t.eof())
251 				continue;
252 
253 			for (;;) {
254 				final int cmp = t.pathCompare(minRef, TREE_MODE);
255 				if (cmp < 0) {
256 					// The "$path/" may still appear later.
257 					//
258 					t.matchShift++;
259 					t.next(1);
260 					if (t.eof()) {
261 						t.back(t.matchShift);
262 						t.matchShift = 0;
263 						break;
264 					}
265 				} else if (cmp == 0) {
266 					// We have a conflict match here.
267 					//
268 					t.matches = minRef;
269 					treeMatch = t;
270 					break;
271 				} else {
272 					// A conflict match is not possible.
273 					//
274 					if (t.matchShift != 0) {
275 						t.back(t.matchShift);
276 						t.matchShift = 0;
277 					}
278 					break;
279 				}
280 			}
281 		}
282 
283 		if (treeMatch != null) {
284 			// If we do have a conflict use one of the directory
285 			// matching iterators instead of the file iterator.
286 			// This way isSubtree is true and isRecursive works.
287 			//
288 			for (final AbstractTreeIterator t : trees)
289 				if (t.matches == minRef)
290 					t.matches = treeMatch;
291 
292 			if (dfConflict == null)
293 				dfConflict = treeMatch;
294 
295 			return treeMatch;
296 		}
297 
298 		return minRef;
299 	}
300 
301 	@Override
302 	void popEntriesEqual() throws CorruptObjectException {
303 		final AbstractTreeIterator ch = currentHead;
304 		for (int i = 0; i < trees.length; i++) {
305 			final AbstractTreeIterator t = trees[i];
306 			if (t.matches == ch) {
307 				if (t.matchShift == 0)
308 					t.next(1);
309 				else {
310 					t.back(t.matchShift);
311 					t.matchShift = 0;
312 				}
313 				t.matches = null;
314 			}
315 		}
316 
317 		if (ch == dfConflict)
318 			dfConflict = null;
319 	}
320 
321 	@Override
322 	void skipEntriesEqual() throws CorruptObjectException {
323 		final AbstractTreeIterator ch = currentHead;
324 		for (int i = 0; i < trees.length; i++) {
325 			final AbstractTreeIterator t = trees[i];
326 			if (t.matches == ch) {
327 				if (t.matchShift == 0)
328 					t.skip();
329 				else {
330 					t.back(t.matchShift);
331 					t.matchShift = 0;
332 				}
333 				t.matches = null;
334 			}
335 		}
336 
337 		if (ch == dfConflict)
338 			dfConflict = null;
339 	}
340 
341 	/**
342 	 * True if the current entry is covered by a directory/file conflict.
343 	 *
344 	 * This means that for some prefix of the current entry's path, this walk
345 	 * has detected a directory/file conflict. Also true if the current entry
346 	 * itself is a directory/file conflict.
347 	 *
348 	 * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
349 	 * true then you know that either for path foo or for path foo/bar files and
350 	 * folders were detected.
351 	 *
352 	 * @return <code>true</code> if the current entry is covered by a
353 	 *         directory/file conflict, <code>false</code> otherwise
354 	 */
355 	public boolean isDirectoryFileConflict() {
356 		return dfConflict != null;
357 	}
358 }