1 /* 2 * Copyright (C) 2008-2009, Google Inc. 3 * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com> 4 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> 5 * and other copyright owners as documented in the project's IP log. 6 * 7 * This program and the accompanying materials are made available 8 * under the terms of the Eclipse Distribution License v1.0 which 9 * accompanies this distribution, is reproduced below, and is 10 * available at http://www.eclipse.org/org/documents/edl-v10.php 11 * 12 * All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials provided 24 * with the distribution. 25 * 26 * - Neither the name of the Eclipse Foundation, Inc. nor the 27 * names of its contributors may be used to endorse or promote 28 * products derived from this software without specific prior 29 * written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 32 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 33 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 36 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 37 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 40 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 43 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 package org.eclipse.jgit.treewalk; 47 48 import java.io.IOException; 49 import java.nio.ByteBuffer; 50 import java.nio.CharBuffer; 51 52 import org.eclipse.jgit.dircache.DirCacheCheckout; 53 import org.eclipse.jgit.errors.CorruptObjectException; 54 import org.eclipse.jgit.errors.IncorrectObjectTypeException; 55 import org.eclipse.jgit.lib.Constants; 56 import org.eclipse.jgit.lib.FileMode; 57 import org.eclipse.jgit.lib.MutableObjectId; 58 import org.eclipse.jgit.lib.ObjectId; 59 import org.eclipse.jgit.lib.ObjectReader; 60 import org.eclipse.jgit.treewalk.filter.TreeFilter; 61 62 /** 63 * Walks a Git tree (directory) in Git sort order. 64 * <p> 65 * A new iterator instance should be positioned on the first entry, or at eof. 66 * Data for the first entry (if not at eof) should be available immediately. 67 * <p> 68 * Implementors must walk a tree in the Git sort order, which has the following 69 * odd sorting: 70 * <ol> 71 * <li>A.c</li> 72 * <li>A/c</li> 73 * <li>A0c</li> 74 * </ol> 75 * <p> 76 * In the second item, <code>A</code> is the name of a subtree and 77 * <code>c</code> is a file within that subtree. The other two items are files 78 * in the root level tree. 79 * 80 * @see CanonicalTreeParser 81 */ 82 public abstract class AbstractTreeIterator { 83 /** Default size for the {@link #path} buffer. */ 84 protected static final int DEFAULT_PATH_SIZE = 128; 85 86 /** A dummy object id buffer that matches the zero ObjectId. */ 87 protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH]; 88 89 /** Iterator for the parent tree; null if we are the root iterator. */ 90 final AbstractTreeIterator parent; 91 92 /** The iterator this current entry is path equal to. */ 93 AbstractTreeIterator matches; 94 95 /** 96 * Number of entries we moved forward to force a D/F conflict match. 97 * 98 * @see NameConflictTreeWalk 99 */ 100 int matchShift; 101 102 /** 103 * Mode bits for the current entry. 104 * <p> 105 * A numerical value from FileMode is usually faster for an iterator to 106 * obtain from its data source so this is the preferred representation. 107 * 108 * @see org.eclipse.jgit.lib.FileMode 109 */ 110 protected int mode; 111 112 /** 113 * Path buffer for the current entry. 114 * <p> 115 * This buffer is pre-allocated at the start of walking and is shared from 116 * parent iterators down into their subtree iterators. The sharing allows 117 * the current entry to always be a full path from the root, while each 118 * subtree only needs to populate the part that is under their control. 119 */ 120 protected byte[] path; 121 122 /** 123 * Position within {@link #path} this iterator starts writing at. 124 * <p> 125 * This is the first offset in {@link #path} that this iterator must 126 * populate during {@link #next}. At the root level (when {@link #parent} 127 * is null) this is 0. For a subtree iterator the index before this position 128 * should have the value '/'. 129 */ 130 protected final int pathOffset; 131 132 /** 133 * Total length of the current entry's complete path from the root. 134 * <p> 135 * This is the number of bytes within {@link #path} that pertain to the 136 * current entry. Values at this index through the end of the array are 137 * garbage and may be randomly populated from prior entries. 138 */ 139 protected int pathLen; 140 141 /** Create a new iterator with no parent. */ 142 protected AbstractTreeIterator() { 143 parent = null; 144 path = new byte[DEFAULT_PATH_SIZE]; 145 pathOffset = 0; 146 } 147 148 /** 149 * Create a new iterator with no parent and a prefix. 150 * <p> 151 * The prefix path supplied is inserted in front of all paths generated by 152 * this iterator. It is intended to be used when an iterator is being 153 * created for a subsection of an overall repository and needs to be 154 * combined with other iterators that are created to run over the entire 155 * repository namespace. 156 * 157 * @param prefix 158 * position of this iterator in the repository tree. The value 159 * may be null or the empty string to indicate the prefix is the 160 * root of the repository. A trailing slash ('/') is 161 * automatically appended if the prefix does not end in '/'. 162 */ 163 protected AbstractTreeIterator(final String prefix) { 164 parent = null; 165 166 if (prefix != null && prefix.length() > 0) { 167 final ByteBuffer b; 168 169 b = Constants.CHARSET.encode(CharBuffer.wrap(prefix)); 170 pathLen = b.limit(); 171 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)]; 172 b.get(path, 0, pathLen); 173 if (path[pathLen - 1] != '/') 174 path[pathLen++] = '/'; 175 pathOffset = pathLen; 176 } else { 177 path = new byte[DEFAULT_PATH_SIZE]; 178 pathOffset = 0; 179 } 180 } 181 182 /** 183 * Create a new iterator with no parent and a prefix. 184 * <p> 185 * The prefix path supplied is inserted in front of all paths generated by 186 * this iterator. It is intended to be used when an iterator is being 187 * created for a subsection of an overall repository and needs to be 188 * combined with other iterators that are created to run over the entire 189 * repository namespace. 190 * 191 * @param prefix 192 * position of this iterator in the repository tree. The value 193 * may be null or the empty array to indicate the prefix is the 194 * root of the repository. A trailing slash ('/') is 195 * automatically appended if the prefix does not end in '/'. 196 */ 197 protected AbstractTreeIterator(final byte[] prefix) { 198 parent = null; 199 200 if (prefix != null && prefix.length > 0) { 201 pathLen = prefix.length; 202 path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)]; 203 System.arraycopy(prefix, 0, path, 0, pathLen); 204 if (path[pathLen - 1] != '/') 205 path[pathLen++] = '/'; 206 pathOffset = pathLen; 207 } else { 208 path = new byte[DEFAULT_PATH_SIZE]; 209 pathOffset = 0; 210 } 211 } 212 213 /** 214 * Create an iterator for a subtree of an existing iterator. 215 * 216 * @param p 217 * parent tree iterator. 218 */ 219 protected AbstractTreeIterator(final AbstractTreeIterator p) { 220 parent = p; 221 path = p.path; 222 pathOffset = p.pathLen + 1; 223 224 try { 225 path[pathOffset - 1] = '/'; 226 } catch (ArrayIndexOutOfBoundsException e) { 227 growPath(p.pathLen); 228 path[pathOffset - 1] = '/'; 229 } 230 } 231 232 /** 233 * Create an iterator for a subtree of an existing iterator. 234 * <p> 235 * The caller is responsible for setting up the path of the child iterator. 236 * 237 * @param p 238 * parent tree iterator. 239 * @param childPath 240 * path array to be used by the child iterator. This path must 241 * contain the path from the top of the walk to the first child 242 * and must end with a '/'. 243 * @param childPathOffset 244 * position within <code>childPath</code> where the child can 245 * insert its data. The value at 246 * <code>childPath[childPathOffset-1]</code> must be '/'. 247 */ 248 protected AbstractTreeIterator(final AbstractTreeIterator p, 249 final byte[] childPath, final int childPathOffset) { 250 parent = p; 251 path = childPath; 252 pathOffset = childPathOffset; 253 } 254 255 /** 256 * Grow the path buffer larger. 257 * 258 * @param len 259 * number of live bytes in the path buffer. This many bytes will 260 * be moved into the larger buffer. 261 */ 262 protected void growPath(final int len) { 263 setPathCapacity(path.length << 1, len); 264 } 265 266 /** 267 * Ensure that path is capable to hold at least {@code capacity} bytes 268 * 269 * @param capacity 270 * the amount of bytes to hold 271 * @param len 272 * the amount of live bytes in path buffer 273 */ 274 protected void ensurePathCapacity(final int capacity, final int len) { 275 if (path.length >= capacity) 276 return; 277 final byte[] o = path; 278 int current = o.length; 279 int newCapacity = current; 280 while (newCapacity < capacity && newCapacity > 0) 281 newCapacity <<= 1; 282 setPathCapacity(newCapacity, len); 283 } 284 285 /** 286 * Set path buffer capacity to the specified size 287 * 288 * @param capacity 289 * the new size 290 * @param len 291 * the amount of bytes to copy 292 */ 293 private void setPathCapacity(int capacity, int len) { 294 final byte[] o = path; 295 final byte[] n = new byte[capacity]; 296 System.arraycopy(o, 0, n, 0, len); 297 for (AbstractTreeIterator p = this; p != null && p.path == o; p = p.parent) 298 p.path = n; 299 } 300 301 /** 302 * Compare the path of this current entry to another iterator's entry. 303 * 304 * @param p 305 * the other iterator to compare the path against. 306 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if 307 * p's entry sorts first. 308 */ 309 public int pathCompare(final AbstractTreeIterator p) { 310 return pathCompare(p, p.mode); 311 } 312 313 int pathCompare(final AbstractTreeIterator p, final int pMode) { 314 // Its common when we are a subtree for both parents to match; 315 // when this happens everything in path[0..cPos] is known to 316 // be equal and does not require evaluation again. 317 // 318 int cPos = alreadyMatch(this, p); 319 return pathCompare(p.path, cPos, p.pathLen, pMode, cPos); 320 } 321 322 /** 323 * Compare the path of this current entry to a raw buffer. 324 * 325 * @param buf 326 * the raw path buffer. 327 * @param pos 328 * position to start reading the raw buffer. 329 * @param end 330 * one past the end of the raw buffer (length is end - pos). 331 * @param pathMode 332 * the mode of the path. 333 * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if 334 * p's entry sorts first. 335 */ 336 public int pathCompare(byte[] buf, int pos, int end, int pathMode) { 337 return pathCompare(buf, pos, end, pathMode, 0); 338 } 339 340 private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) { 341 final byte[] a = path; 342 final int aEnd = pathLen; 343 344 for (; aPos < aEnd && bPos < bEnd; aPos++, bPos++) { 345 final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff); 346 if (cmp != 0) 347 return cmp; 348 } 349 350 if (aPos < aEnd) 351 return (a[aPos] & 0xff) - lastPathChar(bMode); 352 if (bPos < bEnd) 353 return lastPathChar(mode) - (b[bPos] & 0xff); 354 return lastPathChar(mode) - lastPathChar(bMode); 355 } 356 357 private static int alreadyMatch(AbstractTreeIterator a, 358 AbstractTreeIterator b) { 359 for (;;) { 360 final AbstractTreeIterator ap = a.parent; 361 final AbstractTreeIterator bp = b.parent; 362 if (ap == null || bp == null) 363 return 0; 364 if (ap.matches == bp.matches) 365 return a.pathOffset; 366 a = ap; 367 b = bp; 368 } 369 } 370 371 private static int lastPathChar(final int mode) { 372 return FileMode.TREE.equals(mode) ? '/' : '\0'; 373 } 374 375 /** 376 * Check if the current entry of both iterators has the same id. 377 * <p> 378 * This method is faster than {@link #getEntryObjectId()} as it does not 379 * require copying the bytes out of the buffers. A direct {@link #idBuffer} 380 * compare operation is performed. 381 * 382 * @param otherIterator 383 * the other iterator to test against. 384 * @return true if both iterators have the same object id; false otherwise. 385 */ 386 public boolean idEqual(final AbstractTreeIterator otherIterator) { 387 return ObjectId.equals(idBuffer(), idOffset(), 388 otherIterator.idBuffer(), otherIterator.idOffset()); 389 } 390 391 /** @return true if the entry has a valid ObjectId. */ 392 public abstract boolean hasId(); 393 394 /** 395 * Get the object id of the current entry. 396 * 397 * @return an object id for the current entry. 398 */ 399 public ObjectId getEntryObjectId() { 400 return ObjectId.fromRaw(idBuffer(), idOffset()); 401 } 402 403 /** 404 * Obtain the ObjectId for the current entry. 405 * 406 * @param out 407 * buffer to copy the object id into. 408 */ 409 public void getEntryObjectId(final MutableObjectId out) { 410 out.fromRaw(idBuffer(), idOffset()); 411 } 412 413 /** @return the file mode of the current entry. */ 414 public FileMode getEntryFileMode() { 415 return FileMode.fromBits(mode); 416 } 417 418 /** @return the file mode of the current entry as bits */ 419 public int getEntryRawMode() { 420 return mode; 421 } 422 423 /** @return path of the current entry, as a string. */ 424 public String getEntryPathString() { 425 return TreeWalk.pathOf(this); 426 } 427 428 /** 429 * Get the current entry path buffer. 430 * <p> 431 * Note that the returned byte[] has to be used together with 432 * {@link #getEntryPathLength()} (only use bytes up to this length). 433 * 434 * @return the internal buffer holding the current path. 435 */ 436 public byte[] getEntryPathBuffer() { 437 return path; 438 } 439 440 /** @return length of the path in {@link #getEntryPathBuffer()}. */ 441 public int getEntryPathLength() { 442 return pathLen; 443 } 444 445 /** 446 * Get the current entry's path hash code. 447 * <p> 448 * This method computes a hash code on the fly for this path, the hash is 449 * suitable to cluster objects that may have similar paths together. 450 * 451 * @return path hash code; any integer may be returned. 452 */ 453 public int getEntryPathHashCode() { 454 int hash = 0; 455 for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) { 456 byte c = path[i]; 457 if (c != ' ') 458 hash = (hash >>> 2) + (c << 24); 459 } 460 return hash; 461 } 462 463 /** 464 * Get the byte array buffer object IDs must be copied out of. 465 * <p> 466 * The id buffer contains the bytes necessary to construct an ObjectId for 467 * the current entry of this iterator. The buffer can be the same buffer for 468 * all entries, or it can be a unique buffer per-entry. Implementations are 469 * encouraged to expose their private buffer whenever possible to reduce 470 * garbage generation and copying costs. 471 * 472 * @return byte array the implementation stores object IDs within. 473 * @see #getEntryObjectId() 474 */ 475 public abstract byte[] idBuffer(); 476 477 /** 478 * Get the position within {@link #idBuffer()} of this entry's ObjectId. 479 * 480 * @return offset into the array returned by {@link #idBuffer()} where the 481 * ObjectId must be copied out of. 482 */ 483 public abstract int idOffset(); 484 485 /** 486 * Create a new iterator for the current entry's subtree. 487 * <p> 488 * The parent reference of the iterator must be <code>this</code>, 489 * otherwise the caller would not be able to exit out of the subtree 490 * iterator correctly and return to continue walking <code>this</code>. 491 * 492 * @param reader 493 * reader to load the tree data from. 494 * @return a new parser that walks over the current subtree. 495 * @throws IncorrectObjectTypeException 496 * the current entry is not actually a tree and cannot be parsed 497 * as though it were a tree. 498 * @throws IOException 499 * a loose object or pack file could not be read. 500 */ 501 public abstract AbstractTreeIterator createSubtreeIterator( 502 ObjectReader reader) throws IncorrectObjectTypeException, 503 IOException; 504 505 /** 506 * Create a new iterator as though the current entry were a subtree. 507 * 508 * @return a new empty tree iterator. 509 */ 510 public EmptyTreeIterator createEmptyTreeIterator() { 511 return new EmptyTreeIterator(this); 512 } 513 514 /** 515 * Create a new iterator for the current entry's subtree. 516 * <p> 517 * The parent reference of the iterator must be <code>this</code>, otherwise 518 * the caller would not be able to exit out of the subtree iterator 519 * correctly and return to continue walking <code>this</code>. 520 * 521 * @param reader 522 * reader to load the tree data from. 523 * @param idBuffer 524 * temporary ObjectId buffer for use by this method. 525 * @return a new parser that walks over the current subtree. 526 * @throws IncorrectObjectTypeException 527 * the current entry is not actually a tree and cannot be parsed 528 * as though it were a tree. 529 * @throws IOException 530 * a loose object or pack file could not be read. 531 */ 532 public AbstractTreeIterator createSubtreeIterator( 533 final ObjectReader reader, final MutableObjectId idBuffer) 534 throws IncorrectObjectTypeException, IOException { 535 return createSubtreeIterator(reader); 536 } 537 538 /** 539 * Position this iterator on the first entry. 540 * 541 * The default implementation of this method uses {@code back(1)} until 542 * {@code first()} is true. This is most likely not the most efficient 543 * method of repositioning the iterator to its first entry, so subclasses 544 * are strongly encouraged to override the method. 545 * 546 * @throws CorruptObjectException 547 * the tree is invalid. 548 */ 549 public void reset() throws CorruptObjectException { 550 while (!first()) 551 back(1); 552 } 553 554 /** 555 * Is this tree iterator positioned on its first entry? 556 * <p> 557 * An iterator is positioned on the first entry if <code>back(1)</code> 558 * would be an invalid request as there is no entry before the current one. 559 * <p> 560 * An empty iterator (one with no entries) will be 561 * <code>first() && eof()</code>. 562 * 563 * @return true if the iterator is positioned on the first entry. 564 */ 565 public abstract boolean first(); 566 567 /** 568 * Is this tree iterator at its EOF point (no more entries)? 569 * <p> 570 * An iterator is at EOF if there is no current entry. 571 * 572 * @return true if we have walked all entries and have none left. 573 */ 574 public abstract boolean eof(); 575 576 /** 577 * Move to next entry, populating this iterator with the entry data. 578 * <p> 579 * The delta indicates how many moves forward should occur. The most common 580 * delta is 1 to move to the next entry. 581 * <p> 582 * Implementations must populate the following members: 583 * <ul> 584 * <li>{@link #mode}</li> 585 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li> 586 * <li>{@link #pathLen}</li> 587 * </ul> 588 * as well as any implementation dependent information necessary to 589 * accurately return data from {@link #idBuffer()} and {@link #idOffset()} 590 * when demanded. 591 * 592 * @param delta 593 * number of entries to move the iterator by. Must be a positive, 594 * non-zero integer. 595 * @throws CorruptObjectException 596 * the tree is invalid. 597 */ 598 public abstract void next(int delta) throws CorruptObjectException; 599 600 /** 601 * Move to prior entry, populating this iterator with the entry data. 602 * <p> 603 * The delta indicates how many moves backward should occur.The most common 604 * delta is 1 to move to the prior entry. 605 * <p> 606 * Implementations must populate the following members: 607 * <ul> 608 * <li>{@link #mode}</li> 609 * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li> 610 * <li>{@link #pathLen}</li> 611 * </ul> 612 * as well as any implementation dependent information necessary to 613 * accurately return data from {@link #idBuffer()} and {@link #idOffset()} 614 * when demanded. 615 * 616 * @param delta 617 * number of entries to move the iterator by. Must be a positive, 618 * non-zero integer. 619 * @throws CorruptObjectException 620 * the tree is invalid. 621 */ 622 public abstract void back(int delta) throws CorruptObjectException; 623 624 /** 625 * Advance to the next tree entry, populating this iterator with its data. 626 * <p> 627 * This method behaves like <code>seek(1)</code> but is called by 628 * {@link TreeWalk} only if a {@link TreeFilter} was used and ruled out the 629 * current entry from the results. In such cases this tree iterator may 630 * perform special behavior. 631 * 632 * @throws CorruptObjectException 633 * the tree is invalid. 634 */ 635 public void skip() throws CorruptObjectException { 636 next(1); 637 } 638 639 /** 640 * Indicates to the iterator that no more entries will be read. 641 * <p> 642 * This is only invoked by TreeWalk when the iteration is aborted early due 643 * to a {@link org.eclipse.jgit.errors.StopWalkException} being thrown from 644 * within a TreeFilter. 645 */ 646 public void stopWalk() { 647 // Do nothing by default. Most iterators do not care. 648 } 649 650 /** 651 * @return the length of the name component of the path for the current entry 652 */ 653 public int getNameLength() { 654 return pathLen - pathOffset; 655 } 656 657 /** 658 * JGit internal API for use by {@link DirCacheCheckout} 659 * 660 * @return start of name component part within {@link #getEntryPathBuffer()} 661 * @since 2.0 662 */ 663 public int getNameOffset() { 664 return pathOffset; 665 } 666 667 /** 668 * Get the name component of the current entry path into the provided 669 * buffer. 670 * 671 * @param buffer 672 * the buffer to get the name into, it is assumed that buffer can 673 * hold the name 674 * @param offset 675 * the offset of the name in the buffer 676 * @see #getNameLength() 677 */ 678 public void getName(byte[] buffer, int offset) { 679 System.arraycopy(path, pathOffset, buffer, offset, pathLen - pathOffset); 680 } 681 682 @SuppressWarnings("nls") 683 @Override 684 public String toString() { 685 return getClass().getSimpleName() + "[" + getEntryPathString() + "]"; //$NON-NLS-1$ 686 } 687 }