View Javadoc
1   /*
2    * Copyright (C) 2017, Google Inc.
3    * and other copyright owners as documented in the project's IP log.
4    *
5    * This program and the accompanying materials are made available
6    * under the terms of the Eclipse Distribution License v1.0 which
7    * accompanies this distribution, is reproduced below, and is
8    * available at http://www.eclipse.org/org/documents/edl-v10.php
9    *
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or
13   * without modification, are permitted provided that the following
14   * conditions are met:
15   *
16   * - Redistributions of source code must retain the above copyright
17   *   notice, this list of conditions and the following disclaimer.
18   *
19   * - Redistributions in binary form must reproduce the above
20   *   copyright notice, this list of conditions and the following
21   *   disclaimer in the documentation and/or other materials provided
22   *   with the distribution.
23   *
24   * - Neither the name of the Eclipse Foundation, Inc. nor the
25   *   names of its contributors may be used to endorse or promote
26   *   products derived from this software without specific prior
27   *   written permission.
28   *
29   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42   */
43  
44  package org.eclipse.jgit.internal.storage.reftable;
45  
46  import java.io.IOException;
47  import java.io.OutputStream;
48  import java.util.ArrayDeque;
49  import java.util.ArrayList;
50  import java.util.List;
51  
52  import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats;
53  import org.eclipse.jgit.lib.PersonIdent;
54  import org.eclipse.jgit.lib.ReflogEntry;
55  
56  /**
57   * Merges reftables and compacts them into a single output.
58   * <p>
59   * For a partial compaction callers should {@link #setIncludeDeletes(boolean)}
60   * to {@code true} to ensure the new reftable continues to use a delete marker
61   * to shadow any lower reftable that may have the reference present.
62   * <p>
63   * By default all log entries within the range defined by
64   * {@link #setMinUpdateIndex(long)} and {@link #setMaxUpdateIndex(long)} are
65   * copied, even if no references in the output file match the log records.
66   * Callers may truncate the log to a more recent time horizon with
67   * {@link #setOldestReflogTimeMillis(long)}, or disable the log altogether with
68   * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}.
69   */
70  public class ReftableCompactor {
71  	private final ReftableWriter writer;
72  	private final ArrayDeque<ReftableReader> tables = new ArrayDeque<>();
73  
74  	private long compactBytesLimit;
75  	private long bytesToCompact;
76  	private boolean includeDeletes;
77  	private long minUpdateIndex = -1;
78  	private long maxUpdateIndex;
79  	private long oldestReflogTimeMillis;
80  	private Stats stats;
81  
82  	/**
83  	 * Creates a new compactor.
84  	 *
85  	 * @param out
86  	 *            stream to write the compacted tables to. Caller is responsible
87  	 *            for closing {@code out}.
88  	 */
89  	public ReftableCompactor(OutputStream out) {
90  		writer = new ReftableWriter(out);
91  	}
92  
93  	/**
94  	 * Set configuration for the reftable.
95  	 *
96  	 * @param cfg
97  	 *            configuration for the reftable.
98  	 * @return {@code this}
99  	 */
100 	public ReftableCompactor setConfig(ReftableConfig cfg) {
101 		writer.setConfig(cfg);
102 		return this;
103 	}
104 
105 	/**
106 	 * Set limit on number of bytes from source tables to compact.
107 	 *
108 	 * @param bytes
109 	 *            limit on number of bytes from source tables to compact.
110 	 * @return {@code this}
111 	 */
112 	public ReftableCompactor setCompactBytesLimit(long bytes) {
113 		compactBytesLimit = bytes;
114 		return this;
115 	}
116 
117 	/**
118 	 * Whether to include deletions in the output, which may be necessary for
119 	 * partial compaction.
120 	 *
121 	 * @param deletes
122 	 *            {@code true} to include deletions in the output, which may be
123 	 *            necessary for partial compaction.
124 	 * @return {@code this}
125 	 */
126 	public ReftableCompactor setIncludeDeletes(boolean deletes) {
127 		includeDeletes = deletes;
128 		return this;
129 	}
130 
131 	/**
132 	 * Set the minimum update index for log entries that appear in the compacted
133 	 * reftable.
134 	 *
135 	 * @param min
136 	 *            the minimum update index for log entries that appear in the
137 	 *            compacted reftable. This should be 1 higher than the prior
138 	 *            reftable's {@code maxUpdateIndex} if this table will be used
139 	 *            in a stack.
140 	 * @return {@code this}
141 	 */
142 	public ReftableCompactor setMinUpdateIndex(long min) {
143 		minUpdateIndex = min;
144 		return this;
145 	}
146 
147 	/**
148 	 * Set the maximum update index for log entries that appear in the compacted
149 	 * reftable.
150 	 *
151 	 * @param max
152 	 *            the maximum update index for log entries that appear in the
153 	 *            compacted reftable. This should be at least 1 higher than the
154 	 *            prior reftable's {@code maxUpdateIndex} if this table will be
155 	 *            used in a stack.
156 	 * @return {@code this}
157 	 */
158 	public ReftableCompactor setMaxUpdateIndex(long max) {
159 		maxUpdateIndex = max;
160 		return this;
161 	}
162 
163 	/**
164 	 * Set oldest reflog time to preserve.
165 	 *
166 	 * @param timeMillis
167 	 *            oldest log time to preserve. Entries whose timestamps are
168 	 *            {@code >= timeMillis} will be copied into the output file. Log
169 	 *            entries that predate {@code timeMillis} will be discarded.
170 	 *            Specified in Java standard milliseconds since the epoch.
171 	 * @return {@code this}
172 	 */
173 	public ReftableCompactor setOldestReflogTimeMillis(long timeMillis) {
174 		oldestReflogTimeMillis = timeMillis;
175 		return this;
176 	}
177 
178 	/**
179 	 * Add all of the tables, in the specified order.
180 	 * <p>
181 	 * Unconditionally adds all tables, ignoring the
182 	 * {@link #setCompactBytesLimit(long)}.
183 	 *
184 	 * @param readers
185 	 *            tables to compact. Tables should be ordered oldest first/most
186 	 *            recent last so that the more recent tables can shadow the
187 	 *            older results. Caller is responsible for closing the readers.
188 	 * @throws java.io.IOException
189 	 *             update indexes of a reader cannot be accessed.
190 	 */
191 	public void addAll(List<ReftableReader> readers) throws IOException {
192 		for (ReftableReader r : readers) {
193 			tables.add(r);
194 			adjustUpdateIndexes(r);
195 		}
196 	}
197 
198 	/**
199 	 * Try to add this reader at the bottom of the stack.
200 	 * <p>
201 	 * A reader may be rejected by returning {@code false} if the compactor is
202 	 * already rewriting its {@link #setCompactBytesLimit(long)}. When this
203 	 * happens the caller should stop trying to add tables, and execute the
204 	 * compaction.
205 	 *
206 	 * @param reader
207 	 *            the reader to insert at the bottom of the stack. Caller is
208 	 *            responsible for closing the reader.
209 	 * @return {@code true} if the compactor accepted this table; {@code false}
210 	 *         if the compactor has reached its limit.
211 	 * @throws java.io.IOException
212 	 *             if size of {@code reader}, or its update indexes cannot be read.
213 	 */
214 	public boolean tryAddFirst(ReftableReader reader) throws IOException {
215 		long sz = reader.size();
216 		if (compactBytesLimit > 0 && bytesToCompact + sz > compactBytesLimit) {
217 			return false;
218 		}
219 		bytesToCompact += sz;
220 		adjustUpdateIndexes(reader);
221 		tables.addFirst(reader);
222 		return true;
223 	}
224 
225 	private void adjustUpdateIndexes(ReftableReader reader) throws IOException {
226 		if (minUpdateIndex == -1) {
227 			minUpdateIndex = reader.minUpdateIndex();
228 		} else {
229 			minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex());
230 		}
231 		maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex());
232 	}
233 
234 	/**
235 	 * Write a compaction to {@code out}.
236 	 *
237 	 * @throws java.io.IOException
238 	 *             if tables cannot be read, or cannot be written.
239 	 */
240 	public void compact() throws IOException {
241 		MergedReftable mr = new MergedReftable(new ArrayList<>(tables));
242 		mr.setIncludeDeletes(includeDeletes);
243 
244 		writer.setMinUpdateIndex(Math.max(minUpdateIndex, 0));
245 		writer.setMaxUpdateIndex(maxUpdateIndex);
246 		writer.begin();
247 		mergeRefs(mr);
248 		mergeLogs(mr);
249 		writer.finish();
250 		stats = writer.getStats();
251 	}
252 
253 	/**
254 	 * Get statistics of the last written reftable.
255 	 *
256 	 * @return statistics of the last written reftable.
257 	 */
258 	public Stats getStats() {
259 		return stats;
260 	}
261 
262 	private void mergeRefs(MergedReftable mr) throws IOException {
263 		try (RefCursor rc = mr.allRefs()) {
264 			while (rc.next()) {
265 				writer.writeRef(rc.getRef(), rc.getRef().getUpdateIndex());
266 			}
267 		}
268 	}
269 
270 	private void mergeLogs(MergedReftable mr) throws IOException {
271 		if (oldestReflogTimeMillis == Long.MAX_VALUE) {
272 			return;
273 		}
274 
275 		try (LogCursor lc = mr.allLogs()) {
276 			while (lc.next()) {
277 				long updateIndex = lc.getUpdateIndex();
278 				if (updateIndex < minUpdateIndex
279 						|| updateIndex > maxUpdateIndex) {
280 					// Cannot merge log records outside the header's range.
281 					continue;
282 				}
283 
284 				String refName = lc.getRefName();
285 				ReflogEntry log = lc.getReflogEntry();
286 				if (log == null) {
287 					if (includeDeletes) {
288 						writer.deleteLog(refName, updateIndex);
289 					}
290 					continue;
291 				}
292 
293 				PersonIdent who = log.getWho();
294 				if (who.getWhen().getTime() >= oldestReflogTimeMillis) {
295 					writer.writeLog(
296 							refName,
297 							updateIndex,
298 							who,
299 							log.getOldId(),
300 							log.getNewId(),
301 							log.getComment());
302 				}
303 			}
304 		}
305 	}
306 }