View Javadoc
1   /*
2    * Copyright (C) 2010, Christian Halstrick <christian.halstrick@sap.com> and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  package org.eclipse.jgit.treewalk.filter;
11  
12  import java.io.IOException;
13  import java.util.HashSet;
14  import java.util.LinkedList;
15  import java.util.List;
16  import java.util.Set;
17  
18  import org.eclipse.jgit.dircache.DirCacheEntry;
19  import org.eclipse.jgit.dircache.DirCacheIterator;
20  import org.eclipse.jgit.errors.IncorrectObjectTypeException;
21  import org.eclipse.jgit.errors.MissingObjectException;
22  import org.eclipse.jgit.lib.FileMode;
23  import org.eclipse.jgit.lib.ObjectReader;
24  import org.eclipse.jgit.treewalk.TreeWalk;
25  import org.eclipse.jgit.treewalk.WorkingTreeIterator;
26  
27  /**
28   * A performance optimized variant of
29   * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ANY_DIFF} which should be
30   * used when among the walked trees there is a
31   * {@link org.eclipse.jgit.dircache.DirCacheIterator} and a
32   * {@link org.eclipse.jgit.treewalk.WorkingTreeIterator}. Please see the
33   * documentation of {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ANY_DIFF}
34   * for a basic description of the semantics.
35   * <p>
36   * This filter tries to avoid computing content ids of the files in the
37   * working-tree. In contrast to
38   * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ANY_DIFF} this filter
39   * takes care to first compare the entry from the
40   * {@link org.eclipse.jgit.dircache.DirCacheIterator} with the entries from all
41   * other iterators besides the
42   * {@link org.eclipse.jgit.treewalk.WorkingTreeIterator}. Since all those
43   * entries have fast access to content ids that is very fast. If a difference is
44   * detected in this step this filter decides to include that path before even
45   * looking at the working-tree entry.
46   * <p>
47   * If no difference is found then we have to compare index and working-tree as
48   * the last step. By making use of
49   * {@link org.eclipse.jgit.treewalk.WorkingTreeIterator#isModified(org.eclipse.jgit.dircache.DirCacheEntry, boolean, ObjectReader)}
50   * we can avoid the computation of the content id if the file is not dirty.
51   * <p>
52   * Instances of this filter should not be used for multiple
53   * {@link org.eclipse.jgit.treewalk.TreeWalk}s. Always construct a new instance
54   * of this filter for each TreeWalk.
55   */
56  public class IndexDiffFilter extends TreeFilter {
57  	private final int dirCache;
58  
59  	private final int workingTree;
60  
61  	private final boolean honorIgnores;
62  
63  	private final Set<String> ignoredPaths = new HashSet<>();
64  
65  	private final LinkedList<String> untrackedParentFolders = new LinkedList<>();
66  
67  	private final LinkedList<String> untrackedFolders = new LinkedList<>();
68  
69  	/**
70  	 * Creates a new instance of this filter. Do not use an instance of this
71  	 * filter in multiple treewalks.
72  	 *
73  	 * @param dirCacheIndex
74  	 *            the index of the
75  	 *            {@link org.eclipse.jgit.dircache.DirCacheIterator} in the
76  	 *            associated treewalk
77  	 * @param workingTreeIndex
78  	 *            the index of the
79  	 *            {@link org.eclipse.jgit.treewalk.WorkingTreeIterator} in the
80  	 *            associated treewalk
81  	 */
82  	public IndexDiffFilter(int dirCacheIndex, int workingTreeIndex) {
83  		this(dirCacheIndex, workingTreeIndex, true /* honor ignores */);
84  	}
85  
86  	/**
87  	 * Creates a new instance of this filter. Do not use an instance of this
88  	 * filter in multiple treewalks.
89  	 *
90  	 * @param dirCacheIndex
91  	 *            the index of the
92  	 *            {@link org.eclipse.jgit.dircache.DirCacheIterator} in the
93  	 *            associated treewalk
94  	 * @param workingTreeIndex
95  	 *            the index of the
96  	 *            {@link org.eclipse.jgit.treewalk.WorkingTreeIterator} in the
97  	 *            associated treewalk
98  	 * @param honorIgnores
99  	 *            true if the filter should skip working tree files that are
100 	 *            declared as ignored by the standard exclude mechanisms.
101 	 */
102 	public IndexDiffFilter(int dirCacheIndex, int workingTreeIndex,
103 			boolean honorIgnores) {
104 		this.dirCache = dirCacheIndex;
105 		this.workingTree = workingTreeIndex;
106 		this.honorIgnores = honorIgnores;
107 	}
108 
109 	/** {@inheritDoc} */
110 	@Override
111 	public boolean include(TreeWalk tw) throws MissingObjectException,
112 			IncorrectObjectTypeException, IOException {
113 		final int cnt = tw.getTreeCount();
114 		final int wm = tw.getRawMode(workingTree);
115 		WorkingTreeIterator wi = workingTree(tw);
116 		String path = tw.getPathString();
117 
118 		DirCacheIterator di = tw.getTree(dirCache, DirCacheIterator.class);
119 		if (di != null) {
120 			DirCacheEntry dce = di.getDirCacheEntry();
121 			if (dce != null) {
122 				if (dce.isAssumeValid())
123 					return false;
124 				// Never filter index entries with a stage different from 0
125 				if (dce.getStage() != 0)
126 					return true;
127 			}
128 		}
129 
130 		if (!tw.isPostOrderTraversal()) {
131 			// detect untracked Folders
132 			// Whenever we enter a folder in the workingtree assume it will
133 			// contain only untracked files and add it to
134 			// untrackedParentFolders. If we later find tracked files we will
135 			// remove it from this list
136 			if (FileMode.TREE.equals(wm)
137 					&& !(honorIgnores && wi.isEntryIgnored())) {
138 				// Clean untrackedParentFolders. This potentially moves entries
139 				// from untrackedParentFolders to untrackedFolders
140 				copyUntrackedFolders(path);
141 				// add the folder we just entered to untrackedParentFolders
142 				untrackedParentFolders.addFirst(path);
143 			}
144 
145 			// detect untracked Folders
146 			// Whenever we see a tracked file we know that all of its parent
147 			// folders do not belong into untrackedParentFolders anymore. Clean
148 			// it.
149 			for (int i = 0; i < cnt; i++) {
150 				int rmode = tw.getRawMode(i);
151 				if (i != workingTree && rmode != FileMode.TYPE_MISSING
152 						&& FileMode.TREE.equals(rmode)) {
153 					untrackedParentFolders.clear();
154 					break;
155 				}
156 			}
157 		}
158 
159 		// If the working tree file doesn't exist, it does exist for at least
160 		// one other so include this difference.
161 		if (wm == 0)
162 			return true;
163 
164 		// If the path does not appear in the DirCache and its ignored
165 		// we can avoid returning a result here, but only if its not in any
166 		// other tree.
167 		final int dm = tw.getRawMode(dirCache);
168 		if (dm == FileMode.TYPE_MISSING) {
169 			if (honorIgnores && wi.isEntryIgnored()) {
170 				ignoredPaths.add(wi.getEntryPathString());
171 				int i = 0;
172 				for (; i < cnt; i++) {
173 					if (i == dirCache || i == workingTree)
174 						continue;
175 					if (tw.getRawMode(i) != FileMode.TYPE_MISSING)
176 						break;
177 				}
178 
179 				// If i is cnt then the path does not appear in any other tree,
180 				// and this working tree entry can be safely ignored.
181 				return i != cnt;
182 			}
183 			// In working tree and not ignored, and not in DirCache.
184 			return true;
185 		}
186 
187 		// Always include subtrees as WorkingTreeIterator cannot provide
188 		// efficient elimination of unmodified subtrees.
189 		if (tw.isSubtree())
190 			return true;
191 
192 		// Try the inexpensive comparisons between index and all real trees
193 		// first. Only if we don't find a diff here we have to bother with
194 		// the working tree
195 		for (int i = 0; i < cnt; i++) {
196 			if (i == dirCache || i == workingTree)
197 				continue;
198 			if (tw.getRawMode(i) != dm || !tw.idEqual(i, dirCache))
199 				return true;
200 		}
201 
202 		// Only one chance left to detect a diff: between index and working
203 		// tree. Make use of the WorkingTreeIterator#isModified() method to
204 		// avoid computing SHA1 on filesystem content if not really needed.
205 		return wi.isModified(di == null ? null : di.getDirCacheEntry(), true,
206 				tw.getObjectReader());
207 	}
208 
209 	/**
210 	 * Copy all entries which are still in untrackedParentFolders and which
211 	 * belong to a path this treewalk has left into untrackedFolders. It is sure
212 	 * that we will not find any tracked files underneath these paths. Therefore
213 	 * these paths definitely belong to untracked folders.
214 	 *
215 	 * @param currentPath
216 	 *            the current path of the treewalk
217 	 */
218 	private void copyUntrackedFolders(String currentPath) {
219 		String pathToBeSaved = null;
220 		while (!untrackedParentFolders.isEmpty() && !currentPath
221 				.startsWith(untrackedParentFolders.getFirst() + '/')) {
222 			pathToBeSaved = untrackedParentFolders.removeFirst();
223 		}
224 		if (pathToBeSaved != null) {
225 			while (!untrackedFolders.isEmpty() && untrackedFolders.getLast()
226 					.startsWith(pathToBeSaved + '/')) {
227 				untrackedFolders.removeLast();
228 			}
229 			untrackedFolders.addLast(pathToBeSaved);
230 		}
231 	}
232 
233 	private WorkingTreeIterator workingTree(TreeWalk tw) {
234 		return tw.getTree(workingTree, WorkingTreeIterator.class);
235 	}
236 
237 	/** {@inheritDoc} */
238 	@Override
239 	public boolean shouldBeRecursive() {
240 		// We cannot compare subtrees in the working tree, so encourage
241 		// use of recursive walks where the subtrees are always dived into.
242 		return true;
243 	}
244 
245 	/** {@inheritDoc} */
246 	@Override
247 	public TreeFilter clone() {
248 		return this;
249 	}
250 
251 	/** {@inheritDoc} */
252 	@Override
253 	public String toString() {
254 		return "INDEX_DIFF_FILTER"; //$NON-NLS-1$
255 	}
256 
257 	/**
258 	 * The method returns the list of ignored files and folders. Only the root
259 	 * folder of an ignored folder hierarchy is reported. If a/b/c is listed in
260 	 * the .gitignore then you should not expect a/b/c/d/e/f to be reported
261 	 * here. Only a/b/c will be reported. Furthermore only ignored files /
262 	 * folders are returned that are NOT in the index.
263 	 *
264 	 * @return ignored paths
265 	 */
266 	public Set<String> getIgnoredPaths() {
267 		return ignoredPaths;
268 	}
269 
270 	/**
271 	 * <p>Getter for the field <code>untrackedFolders</code>.</p>
272 	 *
273 	 * @return all paths of folders which contain only untracked files/folders.
274 	 *         If on the associated treewalk postorder traversal was turned on
275 	 *         (see {@link org.eclipse.jgit.treewalk.TreeWalk#setPostOrderTraversal(boolean)}) then an
276 	 *         empty list will be returned.
277 	 */
278 	public List<String> getUntrackedFolders() {
279 		LinkedList<String> ret = new LinkedList<>(untrackedFolders);
280 		if (!untrackedParentFolders.isEmpty()) {
281 			String toBeAdded = untrackedParentFolders.getLast();
282 			while (!ret.isEmpty() && ret.getLast().startsWith(toBeAdded))
283 				ret.removeLast();
284 			ret.addLast(toBeAdded);
285 		}
286 		return ret;
287 	}
288 }