1 /* 2 * Copyright (C) 2017, Google Inc. 3 * and other copyright owners as documented in the project's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 package org.eclipse.jgit.internal.storage.reftable; 45 46 import java.io.IOException; 47 import java.io.OutputStream; 48 import java.util.ArrayDeque; 49 import java.util.ArrayList; 50 import java.util.List; 51 52 import org.eclipse.jgit.internal.storage.reftable.ReftableWriter.Stats; 53 import org.eclipse.jgit.lib.PersonIdent; 54 import org.eclipse.jgit.lib.ReflogEntry; 55 56 /** 57 * Merges reftables and compacts them into a single output. 58 * <p> 59 * For a partial compaction callers should {@link #setIncludeDeletes(boolean)} 60 * to {@code true} to ensure the new reftable continues to use a delete marker 61 * to shadow any lower reftable that may have the reference present. 62 * <p> 63 * By default all log entries within the range defined by 64 * {@link #setMinUpdateIndex(long)} and {@link #setMaxUpdateIndex(long)} are 65 * copied, even if no references in the output file match the log records. 66 * Callers may truncate the log to a more recent time horizon with 67 * {@link #setOldestReflogTimeMillis(long)}, or disable the log altogether with 68 * {@code setOldestReflogTimeMillis(Long.MAX_VALUE)}. 69 */ 70 public class ReftableCompactor { 71 private final ReftableWriter writer; 72 private final ArrayDeque<ReftableReader> tables = new ArrayDeque<>(); 73 74 private long compactBytesLimit; 75 private long bytesToCompact; 76 private boolean includeDeletes; 77 private long minUpdateIndex = -1; 78 private long maxUpdateIndex; 79 private long oldestReflogTimeMillis; 80 private Stats stats; 81 82 /** 83 * Creates a new compactor. 84 * 85 * @param out 86 * stream to write the compacted tables to. Caller is responsible 87 * for closing {@code out}. 88 */ 89 public ReftableCompactor(OutputStream out) { 90 writer = new ReftableWriter(out); 91 } 92 93 /** 94 * Set configuration for the reftable. 95 * 96 * @param cfg 97 * configuration for the reftable. 98 * @return {@code this} 99 */ 100 public ReftableCompactor setConfig(ReftableConfig cfg) { 101 writer.setConfig(cfg); 102 return this; 103 } 104 105 /** 106 * Set limit on number of bytes from source tables to compact. 107 * 108 * @param bytes 109 * limit on number of bytes from source tables to compact. 110 * @return {@code this} 111 */ 112 public ReftableCompactor setCompactBytesLimit(long bytes) { 113 compactBytesLimit = bytes; 114 return this; 115 } 116 117 /** 118 * Whether to include deletions in the output, which may be necessary for 119 * partial compaction. 120 * 121 * @param deletes 122 * {@code true} to include deletions in the output, which may be 123 * necessary for partial compaction. 124 * @return {@code this} 125 */ 126 public ReftableCompactor setIncludeDeletes(boolean deletes) { 127 includeDeletes = deletes; 128 return this; 129 } 130 131 /** 132 * Set the minimum update index for log entries that appear in the compacted 133 * reftable. 134 * 135 * @param min 136 * the minimum update index for log entries that appear in the 137 * compacted reftable. This should be 1 higher than the prior 138 * reftable's {@code maxUpdateIndex} if this table will be used 139 * in a stack. 140 * @return {@code this} 141 */ 142 public ReftableCompactor setMinUpdateIndex(long min) { 143 minUpdateIndex = min; 144 return this; 145 } 146 147 /** 148 * Set the maximum update index for log entries that appear in the compacted 149 * reftable. 150 * 151 * @param max 152 * the maximum update index for log entries that appear in the 153 * compacted reftable. This should be at least 1 higher than the 154 * prior reftable's {@code maxUpdateIndex} if this table will be 155 * used in a stack. 156 * @return {@code this} 157 */ 158 public ReftableCompactor setMaxUpdateIndex(long max) { 159 maxUpdateIndex = max; 160 return this; 161 } 162 163 /** 164 * Set oldest reflog time to preserve. 165 * 166 * @param timeMillis 167 * oldest log time to preserve. Entries whose timestamps are 168 * {@code >= timeMillis} will be copied into the output file. Log 169 * entries that predate {@code timeMillis} will be discarded. 170 * Specified in Java standard milliseconds since the epoch. 171 * @return {@code this} 172 */ 173 public ReftableCompactor setOldestReflogTimeMillis(long timeMillis) { 174 oldestReflogTimeMillis = timeMillis; 175 return this; 176 } 177 178 /** 179 * Add all of the tables, in the specified order. 180 * <p> 181 * Unconditionally adds all tables, ignoring the 182 * {@link #setCompactBytesLimit(long)}. 183 * 184 * @param readers 185 * tables to compact. Tables should be ordered oldest first/most 186 * recent last so that the more recent tables can shadow the 187 * older results. Caller is responsible for closing the readers. 188 * @throws java.io.IOException 189 * update indexes of a reader cannot be accessed. 190 */ 191 public void addAll(List<ReftableReader> readers) throws IOException { 192 for (ReftableReader r : readers) { 193 tables.add(r); 194 adjustUpdateIndexes(r); 195 } 196 } 197 198 /** 199 * Try to add this reader at the bottom of the stack. 200 * <p> 201 * A reader may be rejected by returning {@code false} if the compactor is 202 * already rewriting its {@link #setCompactBytesLimit(long)}. When this 203 * happens the caller should stop trying to add tables, and execute the 204 * compaction. 205 * 206 * @param reader 207 * the reader to insert at the bottom of the stack. Caller is 208 * responsible for closing the reader. 209 * @return {@code true} if the compactor accepted this table; {@code false} 210 * if the compactor has reached its limit. 211 * @throws java.io.IOException 212 * if size of {@code reader}, or its update indexes cannot be read. 213 */ 214 public boolean tryAddFirst(ReftableReader reader) throws IOException { 215 long sz = reader.size(); 216 if (compactBytesLimit > 0 && bytesToCompact + sz > compactBytesLimit) { 217 return false; 218 } 219 bytesToCompact += sz; 220 adjustUpdateIndexes(reader); 221 tables.addFirst(reader); 222 return true; 223 } 224 225 private void adjustUpdateIndexes(ReftableReader reader) throws IOException { 226 if (minUpdateIndex == -1) { 227 minUpdateIndex = reader.minUpdateIndex(); 228 } else { 229 minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex()); 230 } 231 maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex()); 232 } 233 234 /** 235 * Write a compaction to {@code out}. 236 * 237 * @throws java.io.IOException 238 * if tables cannot be read, or cannot be written. 239 */ 240 public void compact() throws IOException { 241 MergedReftable mr = new MergedReftable(new ArrayList<>(tables)); 242 mr.setIncludeDeletes(includeDeletes); 243 244 writer.setMinUpdateIndex(Math.max(minUpdateIndex, 0)); 245 writer.setMaxUpdateIndex(maxUpdateIndex); 246 writer.begin(); 247 mergeRefs(mr); 248 mergeLogs(mr); 249 writer.finish(); 250 stats = writer.getStats(); 251 } 252 253 /** 254 * Get statistics of the last written reftable. 255 * 256 * @return statistics of the last written reftable. 257 */ 258 public Stats getStats() { 259 return stats; 260 } 261 262 private void mergeRefs(MergedReftable mr) throws IOException { 263 try (RefCursor rc = mr.allRefs()) { 264 while (rc.next()) { 265 writer.writeRef(rc.getRef(), rc.getRef().getUpdateIndex()); 266 } 267 } 268 } 269 270 private void mergeLogs(MergedReftable mr) throws IOException { 271 if (oldestReflogTimeMillis == Long.MAX_VALUE) { 272 return; 273 } 274 275 try (LogCursor lc = mr.allLogs()) { 276 while (lc.next()) { 277 long updateIndex = lc.getUpdateIndex(); 278 if (updateIndex < minUpdateIndex 279 || updateIndex > maxUpdateIndex) { 280 // Cannot merge log records outside the header's range. 281 continue; 282 } 283 284 String refName = lc.getRefName(); 285 ReflogEntry log = lc.getReflogEntry(); 286 if (log == null) { 287 if (includeDeletes) { 288 writer.deleteLog(refName, updateIndex); 289 } 290 continue; 291 } 292 293 PersonIdent who = log.getWho(); 294 if (who.getWhen().getTime() >= oldestReflogTimeMillis) { 295 writer.writeLog( 296 refName, 297 updateIndex, 298 who, 299 log.getOldId(), 300 log.getNewId(), 301 log.getComment()); 302 } 303 } 304 } 305 } 306 }