View Javadoc
1   /*
2    * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
3    *
4    * This program and the accompanying materials are made available under the
5    * terms of the Eclipse Distribution License v. 1.0 which is available at
6    * https://www.eclipse.org/org/documents/edl-v10.php.
7    *
8    * SPDX-License-Identifier: BSD-3-Clause
9    */
10  package org.eclipse.jgit.util.io;
11  
12  import java.io.EOFException;
13  import java.io.IOException;
14  import java.io.InputStream;
15  import java.io.StreamCorruptedException;
16  import java.text.MessageFormat;
17  
18  import org.eclipse.jgit.internal.JGitText;
19  
20  /**
21   * An {@link InputStream} that applies a binary delta to a base on the fly.
22   * <p>
23   * Delta application to a base needs random access to the base data. The delta
24   * is expressed as a sequence of copy and insert instructions. A copy
25   * instruction has the form "COPY fromOffset length" and says "copy length bytes
26   * from the base, starting at offset fromOffset, to the result". An insert
27   * instruction has the form "INSERT length" followed by length bytes and says
28   * "copy the next length bytes from the delta to the result".
29   * </p>
30   * <p>
31   * These instructions are generated using a content-defined chunking algorithm
32   * (currently C git uses the standard Rabin variant; but there are others that
33   * could be used) that identifies equal chunks. It is entirely possible that a
34   * later copy instruction has a fromOffset that is before the fromOffset of an
35   * earlier copy instruction.
36   * </p>
37   * <p>
38   * This makes it impossible to stream the base.
39   * </p>
40   * <p>
41   * JGit is limited to 2GB maximum size for the base since array indices are
42   * signed 32bit values.
43   *
44   * @since 5.12
45   */
46  public class BinaryDeltaInputStream extends InputStream {
47  
48  	private final byte[] base;
49  
50  	private final InputStream delta;
51  
52  	private long resultLength;
53  
54  	private long toDeliver = -1;
55  
56  	private int fromBase;
57  
58  	private int fromDelta;
59  
60  	private int baseOffset = -1;
61  
62  	/**
63  	 * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
64  	 * to {@code base}.
65  	 *
66  	 * @param base
67  	 *            data to apply the delta to
68  	 * @param delta
69  	 *            {@link InputStream} delivering the delta to apply
70  	 */
71  	public BinaryDeltaInputStream(byte[] base, InputStream delta) {
72  		this.base = base;
73  		this.delta = delta;
74  	}
75  
76  	@Override
77  	public int read() throws IOException {
78  		int b = readNext();
79  		if (b >= 0) {
80  			toDeliver--;
81  		}
82  		return b;
83  	}
84  
85  	@Override
86  	public int read(byte[] b, int off, int len) throws IOException {
87  		return super.read(b, off, len);
88  	}
89  
90  	private void initialize() throws IOException {
91  		long baseSize = readVarInt(delta);
92  		if (baseSize > Integer.MAX_VALUE || baseSize < 0
93  				|| (int) baseSize != base.length) {
94  			throw new IOException(MessageFormat.format(
95  					JGitText.get().binaryDeltaBaseLengthMismatch,
96  					Integer.valueOf(base.length), Long.valueOf(baseSize)));
97  		}
98  		resultLength = readVarInt(delta);
99  		if (resultLength < 0) {
100 			throw new StreamCorruptedException(
101 					JGitText.get().binaryDeltaInvalidResultLength);
102 		}
103 		toDeliver = resultLength;
104 		baseOffset = 0;
105 	}
106 
107 	private int readNext() throws IOException {
108 		if (baseOffset < 0) {
109 			initialize();
110 		}
111 		if (fromBase > 0) {
112 			fromBase--;
113 			return base[baseOffset++] & 0xFF;
114 		} else if (fromDelta > 0) {
115 			fromDelta--;
116 			return delta.read();
117 		}
118 		int command = delta.read();
119 		if (command < 0) {
120 			return -1;
121 		}
122 		if ((command & 0x80) != 0) {
123 			// Decode offset and length to read from base
124 			long copyOffset = 0;
125 			for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
126 				if ((command & i) != 0) {
127 					copyOffset |= ((long) next(delta)) << shift;
128 				}
129 			}
130 			int copySize = 0;
131 			for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
132 				if ((command & i) != 0) {
133 					copySize |= next(delta) << shift;
134 				}
135 			}
136 			if (copySize == 0) {
137 				copySize = 0x10000;
138 			}
139 			if (copyOffset > base.length - copySize) {
140 				throw new StreamCorruptedException(MessageFormat.format(
141 						JGitText.get().binaryDeltaInvalidOffset,
142 						Long.valueOf(copyOffset), Integer.valueOf(copySize)));
143 			}
144 			baseOffset = (int) copyOffset;
145 			fromBase = copySize;
146 			return readNext();
147 		} else if (command != 0) {
148 			// The next 'command' bytes come from the delta
149 			fromDelta = command - 1;
150 			return delta.read();
151 		} else {
152 			// Zero is reserved
153 			throw new StreamCorruptedException(
154 					JGitText.get().unsupportedCommand0);
155 		}
156 	}
157 
158 	private int next(InputStream in) throws IOException {
159 		int b = in.read();
160 		if (b < 0) {
161 			throw new EOFException();
162 		}
163 		return b;
164 	}
165 
166 	private long readVarInt(InputStream in) throws IOException {
167 		long val = 0;
168 		int shift = 0;
169 		int b;
170 		do {
171 			b = next(in);
172 			val |= ((long) (b & 0x7f)) << shift;
173 			shift += 7;
174 		} while ((b & 0x80) != 0);
175 		return val;
176 	}
177 
178 	/**
179 	 * Tells the expected size of the final result.
180 	 *
181 	 * @return the size
182 	 * @throws IOException
183 	 *             if the size cannot be determined from {@code delta}
184 	 */
185 	public long getExpectedResultSize() throws IOException {
186 		if (baseOffset < 0) {
187 			initialize();
188 		}
189 		return resultLength;
190 	}
191 
192 	/**
193 	 * Tells whether the delta has been fully consumed, and the expected number
194 	 * of bytes for the combined result have been read from this
195 	 * {@link BinaryDeltaInputStream}.
196 	 *
197 	 * @return whether delta application was successful
198 	 */
199 	public boolean isFullyConsumed() {
200 		try {
201 			return toDeliver == 0 && delta.read() < 0;
202 		} catch (IOException e) {
203 			return toDeliver == 0;
204 		}
205 	}
206 
207 	@Override
208 	public void close() throws IOException {
209 		delta.close();
210 	}
211 }