View Javadoc
1   /*
2    * Copyright (C) 2010, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.diff;
12  
13  /**
14   * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
15   * {@link org.eclipse.jgit.diff.EditList} of changes.
16   * <p>
17   * An algorithm's {@code diff} method must be callable from concurrent threads
18   * without data collisions. This permits some algorithms to use a singleton
19   * pattern, with concurrent invocations using the same singleton. Other
20   * algorithms may support parameterization, in which case the caller can create
21   * a unique instance per thread.
22   */
23  public abstract class DiffAlgorithm {
24  	/**
25  	 * Supported diff algorithm
26  	 */
27  	public enum SupportedAlgorithm {
28  		/**
29  		 * Myers diff algorithm
30  		 */
31  		MYERS,
32  
33  		/**
34  		 * Histogram diff algorithm
35  		 */
36  		HISTOGRAM
37  	}
38  
39  	/**
40  	 * Get diff algorithm
41  	 *
42  	 * @param alg
43  	 *            the diff algorithm for which an implementation should be
44  	 *            returned
45  	 * @return an implementation of the specified diff algorithm
46  	 */
47  	public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
48  		switch (alg) {
49  		case MYERS:
50  			return MyersDiff.INSTANCE;
51  		case HISTOGRAM:
52  			return new HistogramDiff();
53  		default:
54  			throw new IllegalArgumentException();
55  		}
56  	}
57  
58  	/**
59  	 * Compare two sequences and identify a list of edits between them.
60  	 *
61  	 * @param cmp
62  	 *            the comparator supplying the element equivalence function.
63  	 * @param a
64  	 *            the first (also known as old or pre-image) sequence. Edits
65  	 *            returned by this algorithm will reference indexes using the
66  	 *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
67  	 *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
68  	 * @param b
69  	 *            the second (also known as new or post-image) sequence. Edits
70  	 *            returned by this algorithm will reference indexes using the
71  	 *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
72  	 *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
73  	 * @return a modifiable edit list comparing the two sequences. If empty, the
74  	 *         sequences are identical according to {@code cmp}'s rules. The
75  	 *         result list is never null.
76  	 */
77  	public <S extends Sequence> EditList diff(
78  			SequenceComparator<? super S> cmp, S a, S b) {
79  		Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
80  
81  		switch (region.getType()) {
82  		case INSERT:
83  		case DELETE:
84  			return EditList.singleton(region);
85  
86  		case REPLACE: {
87  			if (region.getLengthA() == 1 && region.getLengthB() == 1)
88  				return EditList.singleton(region);
89  
90  			SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
91  			Subsequence<S> as = Subsequence.a(a, region);
92  			Subsequence<S> bs = Subsequence.b(b, region);
93  			EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
94  			return normalize(cmp, e, a, b);
95  		}
96  
97  		case EMPTY:
98  			return new EditList(0);
99  
100 		default:
101 			throw new IllegalStateException();
102 		}
103 	}
104 
105 	private static <S extends Sequence> Edit coverEdit(S a, S b) {
106 		return new Edit(0, a.size(), 0, b.size());
107 	}
108 
109 	/**
110 	 * Reorganize an {@link EditList} for better diff consistency.
111 	 * <p>
112 	 * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
113 	 * {@link Edit.Type#DELETE} edits that can be "shifted". For
114 	 * example, the deleted section
115 	 * <pre>
116 	 * -a
117 	 * -b
118 	 * -c
119 	 *  a
120 	 *  b
121 	 *  c
122 	 * </pre>
123 	 * can be shifted down by 1, 2 or 3 locations.
124 	 * <p>
125 	 * To avoid later merge issues, we shift such edits to a
126 	 * consistent location. {@code normalize} uses a simple strategy of
127 	 * shifting such edits to their latest possible location.
128 	 * <p>
129 	 * This strategy may not always produce an aesthetically pleasing
130 	 * diff. For instance, it works well with
131 	 * <pre>
132 	 *  function1 {
133 	 *   ...
134 	 *  }
135 	 *
136 	 * +function2 {
137 	 * + ...
138 	 * +}
139 	 * +
140 	 * function3 {
141 	 * ...
142 	 * }
143 	 * </pre>
144 	 * but less so for
145 	 * <pre>
146 	 *  #
147 	 *  # comment1
148 	 *  #
149 	 *  function1() {
150 	 *  }
151 	 *
152 	 *  #
153 	 * +# comment3
154 	 * +#
155 	 * +function3() {
156 	 * +}
157 	 * +
158 	 * +#
159 	 *  # comment2
160 	 *  #
161 	 *  function2() {
162 	 *  }
163 	 * </pre>
164 	 * <a href="https://github.com/mhagger/diff-slider-tools">More
165 	 * sophisticated strategies</a> are possible, say by calculating a
166 	 * suitable "aesthetic cost" for each possible position and using
167 	 * the lowest cost, but {@code normalize} just shifts edits
168 	 * to the end as much as possible.
169 	 *
170 	 * @param <S>
171 	 *            type of sequence being compared.
172 	 * @param cmp
173 	 *            the comparator supplying the element equivalence function.
174 	 * @param e
175 	 *            a modifiable edit list comparing the provided sequences.
176 	 * @param a
177 	 *            the first (also known as old or pre-image) sequence.
178 	 * @param b
179 	 *            the second (also known as new or post-image) sequence.
180 	 * @return a modifiable edit list with edit regions shifted to their
181 	 *         latest possible location. The result list is never null.
182 	 * @since 4.7
183 	 */
184 	private static <S extends Sequence> EditList normalize(
185 		SequenceComparator<? super S> cmp, EditList e, S a, S b) {
186 		Edit prev = null;
187 		for (int i = e.size() - 1; i >= 0; i--) {
188 			Edit cur = e.get(i);
189 			Edit.Type curType = cur.getType();
190 
191 			int maxA = (prev == null) ? a.size() : prev.beginA;
192 			int maxB = (prev == null) ? b.size() : prev.beginB;
193 
194 			if (curType == Edit.Type.INSERT) {
195 				while (cur.endA < maxA && cur.endB < maxB
196 					&& cmp.equals(b, cur.beginB, b, cur.endB)) {
197 					cur.shift(1);
198 				}
199 			} else if (curType == Edit.Type.DELETE) {
200 				while (cur.endA < maxA && cur.endB < maxB
201 					&& cmp.equals(a, cur.beginA, a, cur.endA)) {
202 					cur.shift(1);
203 				}
204 			}
205 			prev = cur;
206 		}
207 		return e;
208 	}
209 
210 	/**
211 	 * Compare two sequences and identify a list of edits between them.
212 	 *
213 	 * This method should be invoked only after the two sequences have been
214 	 * proven to have no common starting or ending elements. The expected
215 	 * elimination of common starting and ending elements is automatically
216 	 * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
217 	 * method, which invokes this method using
218 	 * {@link org.eclipse.jgit.diff.Subsequence}s.
219 	 *
220 	 * @param cmp
221 	 *            the comparator supplying the element equivalence function.
222 	 * @param a
223 	 *            the first (also known as old or pre-image) sequence. Edits
224 	 *            returned by this algorithm will reference indexes using the
225 	 *            'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
226 	 *            {@link org.eclipse.jgit.diff.Edit#getEndA()}.
227 	 * @param b
228 	 *            the second (also known as new or post-image) sequence. Edits
229 	 *            returned by this algorithm will reference indexes using the
230 	 *            'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
231 	 *            {@link org.eclipse.jgit.diff.Edit#getEndB()}.
232 	 * @return a modifiable edit list comparing the two sequences.
233 	 */
234 	public abstract <S extends Sequence> EditList diffNonCommon(
235 			SequenceComparator<? super S> cmp, S a, S b);
236 }