View Javadoc
1   /*
2    * Copyright (C) 2008-2009, Google Inc. and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  
11  package org.eclipse.jgit.patch;
12  
13  import static org.eclipse.jgit.lib.Constants.encodeASCII;
14  import static org.eclipse.jgit.patch.FileHeader.NEW_NAME;
15  import static org.eclipse.jgit.patch.FileHeader.OLD_NAME;
16  import static org.eclipse.jgit.patch.FileHeader.isHunkHdr;
17  import static org.eclipse.jgit.util.RawParseUtils.match;
18  import static org.eclipse.jgit.util.RawParseUtils.nextLF;
19  
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  import org.eclipse.jgit.internal.JGitText;
26  import org.eclipse.jgit.util.TemporaryBuffer;
27  
28  /**
29   * A parsed collection of {@link org.eclipse.jgit.patch.FileHeader}s from a
30   * unified diff patch file
31   */
32  public class Patch {
33  	static final byte[] DIFF_GIT = encodeASCII("diff --git "); //$NON-NLS-1$
34  
35  	private static final byte[] DIFF_CC = encodeASCII("diff --cc "); //$NON-NLS-1$
36  
37  	private static final byte[] DIFF_COMBINED = encodeASCII("diff --combined "); //$NON-NLS-1$
38  
39  	private static final byte[][] BIN_HEADERS = new byte[][] {
40  			encodeASCII("Binary files "), encodeASCII("Files "), }; //$NON-NLS-1$ //$NON-NLS-2$
41  
42  	private static final byte[] BIN_TRAILER = encodeASCII(" differ\n"); //$NON-NLS-1$
43  
44  	private static final byte[] GIT_BINARY = encodeASCII("GIT binary patch\n"); //$NON-NLS-1$
45  
46  	static final byte[] SIG_FOOTER = encodeASCII("-- \n"); //$NON-NLS-1$
47  
48  	/** The files, in the order they were parsed out of the input. */
49  	private final List<FileHeader> files;
50  
51  	/** Formatting errors, if any were identified. */
52  	private final List<FormatError> errors;
53  
54  	/**
55  	 * Create an empty patch.
56  	 */
57  	public Patch() {
58  		files = new ArrayList<>();
59  		errors = new ArrayList<>(0);
60  	}
61  
62  	/**
63  	 * Add a single file to this patch.
64  	 * <p>
65  	 * Typically files should be added by parsing the text through one of this
66  	 * class's parse methods.
67  	 *
68  	 * @param fh
69  	 *            the header of the file.
70  	 */
71  	public void addFile(FileHeader fh) {
72  		files.add(fh);
73  	}
74  
75  	/**
76  	 * Get list of files described in the patch, in occurrence order.
77  	 *
78  	 * @return list of files described in the patch, in occurrence order.
79  	 */
80  	public List<? extends FileHeader> getFiles() {
81  		return files;
82  	}
83  
84  	/**
85  	 * Add a formatting error to this patch script.
86  	 *
87  	 * @param err
88  	 *            the error description.
89  	 */
90  	public void addError(FormatError err) {
91  		errors.add(err);
92  	}
93  
94  	/**
95  	 * Get collection of formatting errors.
96  	 *
97  	 * @return collection of formatting errors, if any.
98  	 */
99  	public List<FormatError> getErrors() {
100 		return errors;
101 	}
102 
103 	/**
104 	 * Parse a patch received from an InputStream.
105 	 * <p>
106 	 * Multiple parse calls on the same instance will concatenate the patch
107 	 * data, but each parse input must start with a valid file header (don't
108 	 * split a single file across parse calls).
109 	 *
110 	 * @param is
111 	 *            the stream to read the patch data from. The stream is read
112 	 *            until EOF is reached.
113 	 * @throws java.io.IOException
114 	 *             there was an error reading from the input stream.
115 	 */
116 	public void parse(InputStream is) throws IOException {
117 		final byte[] buf = readFully(is);
118 		parse(buf, 0, buf.length);
119 	}
120 
121 	private static byte[] readFully(InputStream is) throws IOException {
122 		try (TemporaryBuffer b = new TemporaryBuffer.Heap(Integer.MAX_VALUE)) {
123 			b.copy(is);
124 			return b.toByteArray();
125 		}
126 	}
127 
128 	/**
129 	 * Parse a patch stored in a byte[].
130 	 * <p>
131 	 * Multiple parse calls on the same instance will concatenate the patch
132 	 * data, but each parse input must start with a valid file header (don't
133 	 * split a single file across parse calls).
134 	 *
135 	 * @param buf
136 	 *            the buffer to parse.
137 	 * @param ptr
138 	 *            starting position to parse from.
139 	 * @param end
140 	 *            1 past the last position to end parsing. The total length to
141 	 *            be parsed is <code>end - ptr</code>.
142 	 */
143 	public void parse(byte[] buf, int ptr, int end) {
144 		while (ptr < end)
145 			ptr = parseFile(buf, ptr, end);
146 	}
147 
148 	private int parseFile(byte[] buf, int c, int end) {
149 		while (c < end) {
150 			if (isHunkHdr(buf, c, end) >= 1) {
151 				// If we find a disconnected hunk header we might
152 				// have missed a file header previously. The hunk
153 				// isn't valid without knowing where it comes from.
154 				//
155 				error(buf, c, JGitText.get().hunkDisconnectedFromFile);
156 				c = nextLF(buf, c);
157 				continue;
158 			}
159 
160 			// Valid git style patch?
161 			//
162 			if (match(buf, c, DIFF_GIT) >= 0)
163 				return parseDiffGit(buf, c, end);
164 			if (match(buf, c, DIFF_CC) >= 0)
165 				return parseDiffCombined(DIFF_CC, buf, c, end);
166 			if (match(buf, c, DIFF_COMBINED) >= 0)
167 				return parseDiffCombined(DIFF_COMBINED, buf, c, end);
168 
169 			// Junk between files? Leading junk? Traditional
170 			// (non-git generated) patch?
171 			//
172 			final int n = nextLF(buf, c);
173 			if (n >= end) {
174 				// Patches cannot be only one line long. This must be
175 				// trailing junk that we should ignore.
176 				//
177 				return end;
178 			}
179 
180 			if (n - c < 6) {
181 				// A valid header must be at least 6 bytes on the
182 				// first line, e.g. "--- a/b\n".
183 				//
184 				c = n;
185 				continue;
186 			}
187 
188 			if (match(buf, c, OLD_NAME) >= 0 && match(buf, n, NEW_NAME) >= 0) {
189 				// Probably a traditional patch. Ensure we have at least
190 				// a "@@ -0,0" smelling line next. We only check the "@@ -".
191 				//
192 				final int f = nextLF(buf, n);
193 				if (f >= end)
194 					return end;
195 				if (isHunkHdr(buf, f, end) == 1)
196 					return parseTraditionalPatch(buf, c, end);
197 			}
198 
199 			c = n;
200 		}
201 		return c;
202 	}
203 
204 	private int parseDiffGit(byte[] buf, int start, int end) {
205 		final FileHeader fh = new FileHeader(buf, start);
206 		int ptr = fh.parseGitFileName(start + DIFF_GIT.length, end);
207 		if (ptr < 0)
208 			return skipFile(buf, start);
209 
210 		ptr = fh.parseGitHeaders(ptr, end);
211 		ptr = parseHunks(fh, ptr, end);
212 		fh.endOffset = ptr;
213 		addFile(fh);
214 		return ptr;
215 	}
216 
217 	private int parseDiffCombined(final byte[] hdr, final byte[] buf,
218 			final int start, final int end) {
219 		final CombinedFileHeader fh = new CombinedFileHeader(buf, start);
220 		int ptr = fh.parseGitFileName(start + hdr.length, end);
221 		if (ptr < 0)
222 			return skipFile(buf, start);
223 
224 		ptr = fh.parseGitHeaders(ptr, end);
225 		ptr = parseHunks(fh, ptr, end);
226 		fh.endOffset = ptr;
227 		addFile(fh);
228 		return ptr;
229 	}
230 
231 	private int parseTraditionalPatch(final byte[] buf, final int start,
232 			final int end) {
233 		final FileHeader fh = new FileHeader(buf, start);
234 		int ptr = fh.parseTraditionalHeaders(start, end);
235 		ptr = parseHunks(fh, ptr, end);
236 		fh.endOffset = ptr;
237 		addFile(fh);
238 		return ptr;
239 	}
240 
241 	private static int skipFile(byte[] buf, int ptr) {
242 		ptr = nextLF(buf, ptr);
243 		if (match(buf, ptr, OLD_NAME) >= 0)
244 			ptr = nextLF(buf, ptr);
245 		return ptr;
246 	}
247 
248 	private int parseHunks(FileHeader fh, int c, int end) {
249 		final byte[] buf = fh.buf;
250 		while (c < end) {
251 			// If we see a file header at this point, we have all of the
252 			// hunks for our current file. We should stop and report back
253 			// with this position so it can be parsed again later.
254 			//
255 			if (match(buf, c, DIFF_GIT) >= 0)
256 				break;
257 			if (match(buf, c, DIFF_CC) >= 0)
258 				break;
259 			if (match(buf, c, DIFF_COMBINED) >= 0)
260 				break;
261 			if (match(buf, c, OLD_NAME) >= 0)
262 				break;
263 			if (match(buf, c, NEW_NAME) >= 0)
264 				break;
265 
266 			if (isHunkHdr(buf, c, end) == fh.getParentCount()) {
267 				final HunkHeader h = fh.newHunkHeader(c);
268 				h.parseHeader();
269 				c = h.parseBody(this, end);
270 				h.endOffset = c;
271 				fh.addHunk(h);
272 				if (c < end) {
273 					switch (buf[c]) {
274 					case '@':
275 					case 'd':
276 					case '\n':
277 						break;
278 					default:
279 						if (match(buf, c, SIG_FOOTER) < 0)
280 							warn(buf, c, JGitText.get().unexpectedHunkTrailer);
281 					}
282 				}
283 				continue;
284 			}
285 
286 			final int eol = nextLF(buf, c);
287 			if (fh.getHunks().isEmpty() && match(buf, c, GIT_BINARY) >= 0) {
288 				fh.patchType = FileHeader.PatchType.GIT_BINARY;
289 				return parseGitBinary(fh, eol, end);
290 			}
291 
292 			if (fh.getHunks().isEmpty() && BIN_TRAILER.length < eol - c
293 					&& match(buf, eol - BIN_TRAILER.length, BIN_TRAILER) >= 0
294 					&& matchAny(buf, c, BIN_HEADERS)) {
295 				// The patch is a binary file diff, with no deltas.
296 				//
297 				fh.patchType = FileHeader.PatchType.BINARY;
298 				return eol;
299 			}
300 
301 			// Skip this line and move to the next. Its probably garbage
302 			// after the last hunk of a file.
303 			//
304 			c = eol;
305 		}
306 
307 		if (fh.getHunks().isEmpty()
308 				&& fh.getPatchType() == FileHeader.PatchType.UNIFIED
309 				&& !fh.hasMetaDataChanges()) {
310 			// Hmm, an empty patch? If there is no metadata here we
311 			// really have a binary patch that we didn't notice above.
312 			//
313 			fh.patchType = FileHeader.PatchType.BINARY;
314 		}
315 
316 		return c;
317 	}
318 
319 	private int parseGitBinary(FileHeader fh, int c, int end) {
320 		final BinaryHunk postImage = new BinaryHunk(fh, c);
321 		final int nEnd = postImage.parseHunk(c, end);
322 		if (nEnd < 0) {
323 			// Not a binary hunk.
324 			//
325 			error(fh.buf, c, JGitText.get().missingForwardImageInGITBinaryPatch);
326 			return c;
327 		}
328 		c = nEnd;
329 		postImage.endOffset = c;
330 		fh.forwardBinaryHunk = postImage;
331 
332 		final BinaryHunk preImage = new BinaryHunk(fh, c);
333 		final int oEnd = preImage.parseHunk(c, end);
334 		if (oEnd >= 0) {
335 			c = oEnd;
336 			preImage.endOffset = c;
337 			fh.reverseBinaryHunk = preImage;
338 		}
339 
340 		return c;
341 	}
342 
343 	void warn(byte[] buf, int ptr, String msg) {
344 		addError(new FormatError(buf, ptr, FormatError.Severity.WARNING, msg));
345 	}
346 
347 	void error(byte[] buf, int ptr, String msg) {
348 		addError(new FormatError(buf, ptr, FormatError.Severity.ERROR, msg));
349 	}
350 
351 	private static boolean matchAny(final byte[] buf, final int c,
352 			final byte[][] srcs) {
353 		for (byte[] s : srcs) {
354 			if (match(buf, c, s) >= 0)
355 				return true;
356 		}
357 		return false;
358 	}
359 }