001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkArgument; 021import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkNotNull; 022import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkPositionIndex; 023 024import java.io.DataInput; 025import java.io.DataOutput; 026import java.io.IOException; 027import java.io.UnsupportedEncodingException; 028import java.math.BigDecimal; 029import java.math.BigInteger; 030import java.nio.ByteBuffer; 031import java.nio.charset.StandardCharsets; 032import java.security.SecureRandom; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.Iterator; 039import java.util.List; 040 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.CellComparator; 043import org.apache.hadoop.hbase.KeyValue; 044import org.apache.hadoop.io.RawComparator; 045import org.apache.hadoop.io.WritableComparator; 046import org.apache.hadoop.io.WritableUtils; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 052import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 053 054import com.google.protobuf.ByteString; 055 056import sun.misc.Unsafe; 057 058/** 059 * Utility class that handles byte arrays, conversions to/from other types, 060 * comparisons, hash code generation, manufacturing keys for HashMaps or 061 * HashSets, and can be used as key in maps or trees. 062 */ 063@SuppressWarnings("restriction") 064@InterfaceAudience.Public 065@edu.umd.cs.findbugs.annotations.SuppressWarnings( 066 value="EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS", 067 justification="It has been like this forever") 068public class Bytes implements Comparable<Bytes> { 069 070 // Using the charset canonical name for String/byte[] conversions is much 071 // more efficient due to use of cached encoders/decoders. 072 private static final String UTF8_CSN = StandardCharsets.UTF_8.name(); 073 074 //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed 075 private static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; 076 077 private static final Logger LOG = LoggerFactory.getLogger(Bytes.class); 078 079 /** 080 * Size of boolean in bytes 081 */ 082 public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE; 083 084 /** 085 * Size of byte in bytes 086 */ 087 public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN; 088 089 /** 090 * Size of char in bytes 091 */ 092 public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE; 093 094 /** 095 * Size of double in bytes 096 */ 097 public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE; 098 099 /** 100 * Size of float in bytes 101 */ 102 public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE; 103 104 /** 105 * Size of int in bytes 106 */ 107 public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE; 108 109 /** 110 * Size of long in bytes 111 */ 112 public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE; 113 114 /** 115 * Size of short in bytes 116 */ 117 public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; 118 119 /** 120 * Mask to apply to a long to reveal the lower int only. Use like this: 121 * int i = (int)(0xFFFFFFFF00000000L ^ some_long_value); 122 */ 123 public static final long MASK_FOR_LOWER_INT_IN_LONG = 0xFFFFFFFF00000000L; 124 125 /** 126 * Estimate of size cost to pay beyond payload in jvm for instance of byte []. 127 * Estimate based on study of jhat and jprofiler numbers. 128 */ 129 // JHat says BU is 56 bytes. 130 // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?) 131 public static final int ESTIMATED_HEAP_TAX = 16; 132 133 @VisibleForTesting 134 static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); 135 136 /** 137 * Returns length of the byte array, returning 0 if the array is null. 138 * Useful for calculating sizes. 139 * @param b byte array, which can be null 140 * @return 0 if b is null, otherwise returns length 141 */ 142 final public static int len(byte[] b) { 143 return b == null ? 0 : b.length; 144 } 145 146 private byte[] bytes; 147 private int offset; 148 private int length; 149 150 /** 151 * Create a zero-size sequence. 152 */ 153 public Bytes() { 154 super(); 155 } 156 157 /** 158 * Create a Bytes using the byte array as the initial value. 159 * @param bytes This array becomes the backing storage for the object. 160 */ 161 public Bytes(byte[] bytes) { 162 this(bytes, 0, bytes.length); 163 } 164 165 /** 166 * Set the new Bytes to the contents of the passed 167 * <code>ibw</code>. 168 * @param ibw the value to set this Bytes to. 169 */ 170 public Bytes(final Bytes ibw) { 171 this(ibw.get(), ibw.getOffset(), ibw.getLength()); 172 } 173 174 /** 175 * Set the value to a given byte range 176 * @param bytes the new byte range to set to 177 * @param offset the offset in newData to start at 178 * @param length the number of bytes in the range 179 */ 180 public Bytes(final byte[] bytes, final int offset, 181 final int length) { 182 this.bytes = bytes; 183 this.offset = offset; 184 this.length = length; 185 } 186 187 /** 188 * Copy bytes from ByteString instance. 189 * @param byteString copy from 190 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 191 */ 192 @Deprecated 193 public Bytes(final ByteString byteString) { 194 this(byteString.toByteArray()); 195 } 196 197 /** 198 * Get the data from the Bytes. 199 * @return The data is only valid between offset and offset+length. 200 */ 201 public byte [] get() { 202 if (this.bytes == null) { 203 throw new IllegalStateException("Uninitialiized. Null constructor " + 204 "called w/o accompaying readFields invocation"); 205 } 206 return this.bytes; 207 } 208 209 /** 210 * @param b Use passed bytes as backing array for this instance. 211 */ 212 public void set(final byte [] b) { 213 set(b, 0, b.length); 214 } 215 216 /** 217 * @param b Use passed bytes as backing array for this instance. 218 * @param offset 219 * @param length 220 */ 221 public void set(final byte [] b, final int offset, final int length) { 222 this.bytes = b; 223 this.offset = offset; 224 this.length = length; 225 } 226 227 /** 228 * @return the number of valid bytes in the buffer 229 * @deprecated use {@link #getLength()} instead 230 */ 231 @Deprecated 232 public int getSize() { 233 if (this.bytes == null) { 234 throw new IllegalStateException("Uninitialiized. Null constructor " + 235 "called w/o accompaying readFields invocation"); 236 } 237 return this.length; 238 } 239 240 /** 241 * @return the number of valid bytes in the buffer 242 */ 243 public int getLength() { 244 if (this.bytes == null) { 245 throw new IllegalStateException("Uninitialiized. Null constructor " + 246 "called w/o accompaying readFields invocation"); 247 } 248 return this.length; 249 } 250 251 /** 252 * @return offset 253 */ 254 public int getOffset(){ 255 return this.offset; 256 } 257 258 /** 259 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 260 */ 261 @Deprecated 262 public ByteString toByteString() { 263 return ByteString.copyFrom(this.bytes, this.offset, this.length); 264 } 265 266 @Override 267 public int hashCode() { 268 return Bytes.hashCode(bytes, offset, length); 269 } 270 271 /** 272 * Define the sort order of the Bytes. 273 * @param that The other bytes writable 274 * @return Positive if left is bigger than right, 0 if they are equal, and 275 * negative if left is smaller than right. 276 */ 277 @Override 278 public int compareTo(Bytes that) { 279 return BYTES_RAWCOMPARATOR.compare( 280 this.bytes, this.offset, this.length, 281 that.bytes, that.offset, that.length); 282 } 283 284 /** 285 * Compares the bytes in this object to the specified byte array 286 * @param that 287 * @return Positive if left is bigger than right, 0 if they are equal, and 288 * negative if left is smaller than right. 289 */ 290 public int compareTo(final byte [] that) { 291 return BYTES_RAWCOMPARATOR.compare( 292 this.bytes, this.offset, this.length, 293 that, 0, that.length); 294 } 295 296 /** 297 * @see Object#equals(Object) 298 */ 299 @Override 300 public boolean equals(Object right_obj) { 301 if (right_obj instanceof byte []) { 302 return compareTo((byte [])right_obj) == 0; 303 } 304 if (right_obj instanceof Bytes) { 305 return compareTo((Bytes)right_obj) == 0; 306 } 307 return false; 308 } 309 310 /** 311 * @see Object#toString() 312 */ 313 @Override 314 public String toString() { 315 return Bytes.toString(bytes, offset, length); 316 } 317 318 /** 319 * @param array List of byte []. 320 * @return Array of byte []. 321 */ 322 public static byte [][] toArray(final List<byte []> array) { 323 // List#toArray doesn't work on lists of byte []. 324 byte[][] results = new byte[array.size()][]; 325 for (int i = 0; i < array.size(); i++) { 326 results[i] = array.get(i); 327 } 328 return results; 329 } 330 331 /** 332 * Returns a copy of the bytes referred to by this writable 333 */ 334 public byte[] copyBytes() { 335 return Arrays.copyOfRange(bytes, offset, offset+length); 336 } 337 /** 338 * Byte array comparator class. 339 */ 340 @InterfaceAudience.Public 341 public static class ByteArrayComparator implements RawComparator<byte []> { 342 /** 343 * Constructor 344 */ 345 public ByteArrayComparator() { 346 super(); 347 } 348 @Override 349 public int compare(byte [] left, byte [] right) { 350 return compareTo(left, right); 351 } 352 @Override 353 public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) { 354 return LexicographicalComparerHolder.BEST_COMPARER. 355 compareTo(b1, s1, l1, b2, s2, l2); 356 } 357 } 358 359 /** 360 * A {@link ByteArrayComparator} that treats the empty array as the largest value. 361 * This is useful for comparing row end keys for regions. 362 */ 363 // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region 364 // boundaries. Thus semantically, we should treat empty byte array as the smallest value 365 // while comparing row keys, start keys etc; but as the largest value for comparing 366 // region boundaries for endKeys. 367 @InterfaceAudience.Public 368 public static class RowEndKeyComparator extends ByteArrayComparator { 369 @Override 370 public int compare(byte[] left, byte[] right) { 371 return compare(left, 0, left.length, right, 0, right.length); 372 } 373 @Override 374 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { 375 if (b1 == b2 && s1 == s2 && l1 == l2) { 376 return 0; 377 } 378 if (l1 == 0) { 379 return l2; //0 or positive 380 } 381 if (l2 == 0) { 382 return -1; 383 } 384 return super.compare(b1, s1, l1, b2, s2, l2); 385 } 386 } 387 388 /** 389 * Pass this to TreeMaps where byte [] are keys. 390 */ 391 public final static Comparator<byte []> BYTES_COMPARATOR = new ByteArrayComparator(); 392 393 /** 394 * Use comparing byte arrays, byte-by-byte 395 */ 396 public final static RawComparator<byte []> BYTES_RAWCOMPARATOR = new ByteArrayComparator(); 397 398 /** 399 * Read byte-array written with a WritableableUtils.vint prefix. 400 * @param in Input to read from. 401 * @return byte array read off <code>in</code> 402 * @throws IOException e 403 */ 404 public static byte [] readByteArray(final DataInput in) 405 throws IOException { 406 int len = WritableUtils.readVInt(in); 407 if (len < 0) { 408 throw new NegativeArraySizeException(Integer.toString(len)); 409 } 410 byte [] result = new byte[len]; 411 in.readFully(result, 0, len); 412 return result; 413 } 414 415 /** 416 * Read byte-array written with a WritableableUtils.vint prefix. 417 * IOException is converted to a RuntimeException. 418 * @param in Input to read from. 419 * @return byte array read off <code>in</code> 420 */ 421 public static byte [] readByteArrayThrowsRuntime(final DataInput in) { 422 try { 423 return readByteArray(in); 424 } catch (Exception e) { 425 throw new RuntimeException(e); 426 } 427 } 428 429 /** 430 * Write byte-array with a WritableableUtils.vint prefix. 431 * @param out output stream to be written to 432 * @param b array to write 433 * @throws IOException e 434 */ 435 public static void writeByteArray(final DataOutput out, final byte [] b) 436 throws IOException { 437 if(b == null) { 438 WritableUtils.writeVInt(out, 0); 439 } else { 440 writeByteArray(out, b, 0, b.length); 441 } 442 } 443 444 /** 445 * Write byte-array to out with a vint length prefix. 446 * @param out output stream 447 * @param b array 448 * @param offset offset into array 449 * @param length length past offset 450 * @throws IOException e 451 */ 452 public static void writeByteArray(final DataOutput out, final byte [] b, 453 final int offset, final int length) 454 throws IOException { 455 WritableUtils.writeVInt(out, length); 456 out.write(b, offset, length); 457 } 458 459 /** 460 * Write byte-array from src to tgt with a vint length prefix. 461 * @param tgt target array 462 * @param tgtOffset offset into target array 463 * @param src source array 464 * @param srcOffset source offset 465 * @param srcLength source length 466 * @return New offset in src array. 467 */ 468 public static int writeByteArray(final byte [] tgt, final int tgtOffset, 469 final byte [] src, final int srcOffset, final int srcLength) { 470 byte [] vint = vintToBytes(srcLength); 471 System.arraycopy(vint, 0, tgt, tgtOffset, vint.length); 472 int offset = tgtOffset + vint.length; 473 System.arraycopy(src, srcOffset, tgt, offset, srcLength); 474 return offset + srcLength; 475 } 476 477 /** 478 * Put bytes at the specified byte array position. 479 * @param tgtBytes the byte array 480 * @param tgtOffset position in the array 481 * @param srcBytes array to write out 482 * @param srcOffset source offset 483 * @param srcLength source length 484 * @return incremented offset 485 */ 486 public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, 487 int srcOffset, int srcLength) { 488 System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength); 489 return tgtOffset + srcLength; 490 } 491 492 /** 493 * Write a single byte out to the specified byte array position. 494 * @param bytes the byte array 495 * @param offset position in the array 496 * @param b byte to write out 497 * @return incremented offset 498 */ 499 public static int putByte(byte[] bytes, int offset, byte b) { 500 bytes[offset] = b; 501 return offset + 1; 502 } 503 504 /** 505 * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified. 506 * @param bytes the byte array 507 * @param offset position in the array 508 * @param buf ByteBuffer to write out 509 * @return incremented offset 510 */ 511 public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) { 512 int len = buf.remaining(); 513 buf.get(bytes, offset, len); 514 return offset + len; 515 } 516 517 /** 518 * Returns a new byte array, copied from the given {@code buf}, 519 * from the index 0 (inclusive) to the limit (exclusive), 520 * regardless of the current position. 521 * The position and the other index parameters are not changed. 522 * 523 * @param buf a byte buffer 524 * @return the byte array 525 * @see #getBytes(ByteBuffer) 526 */ 527 public static byte[] toBytes(ByteBuffer buf) { 528 ByteBuffer dup = buf.duplicate(); 529 dup.position(0); 530 return readBytes(dup); 531 } 532 533 private static byte[] readBytes(ByteBuffer buf) { 534 byte [] result = new byte[buf.remaining()]; 535 buf.get(result); 536 return result; 537 } 538 539 /** 540 * @param b Presumed UTF-8 encoded byte array. 541 * @return String made from <code>b</code> 542 */ 543 public static String toString(final byte [] b) { 544 if (b == null) { 545 return null; 546 } 547 return toString(b, 0, b.length); 548 } 549 550 /** 551 * Joins two byte arrays together using a separator. 552 * @param b1 The first byte array. 553 * @param sep The separator to use. 554 * @param b2 The second byte array. 555 */ 556 public static String toString(final byte [] b1, 557 String sep, 558 final byte [] b2) { 559 return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length); 560 } 561 562 /** 563 * This method will convert utf8 encoded bytes into a string. If 564 * the given byte array is null, this method will return null. 565 * 566 * @param b Presumed UTF-8 encoded byte array. 567 * @param off offset into array 568 * @return String made from <code>b</code> or null 569 */ 570 public static String toString(final byte[] b, int off) { 571 if (b == null) { 572 return null; 573 } 574 int len = b.length - off; 575 if (len <= 0) { 576 return ""; 577 } 578 try { 579 return new String(b, off, len, UTF8_CSN); 580 } catch (UnsupportedEncodingException e) { 581 // should never happen! 582 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 583 } 584 } 585 586 /** 587 * This method will convert utf8 encoded bytes into a string. If 588 * the given byte array is null, this method will return null. 589 * 590 * @param b Presumed UTF-8 encoded byte array. 591 * @param off offset into array 592 * @param len length of utf-8 sequence 593 * @return String made from <code>b</code> or null 594 */ 595 public static String toString(final byte[] b, int off, int len) { 596 if (b == null) { 597 return null; 598 } 599 if (len == 0) { 600 return ""; 601 } 602 try { 603 return new String(b, off, len, UTF8_CSN); 604 } catch (UnsupportedEncodingException e) { 605 // should never happen! 606 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 607 } 608 } 609 610 /** 611 * Write a printable representation of a byte array. 612 * 613 * @param b byte array 614 * @return string 615 * @see #toStringBinary(byte[], int, int) 616 */ 617 public static String toStringBinary(final byte [] b) { 618 if (b == null) 619 return "null"; 620 return toStringBinary(b, 0, b.length); 621 } 622 623 /** 624 * Converts the given byte buffer to a printable representation, 625 * from the index 0 (inclusive) to the limit (exclusive), 626 * regardless of the current position. 627 * The position and the other index parameters are not changed. 628 * 629 * @param buf a byte buffer 630 * @return a string representation of the buffer's binary contents 631 * @see #toBytes(ByteBuffer) 632 * @see #getBytes(ByteBuffer) 633 */ 634 public static String toStringBinary(ByteBuffer buf) { 635 if (buf == null) 636 return "null"; 637 if (buf.hasArray()) { 638 return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); 639 } 640 return toStringBinary(toBytes(buf)); 641 } 642 643 private static final char[] HEX_CHARS_UPPER = { 644 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 645 }; 646 647 /** 648 * Write a printable representation of a byte array. Non-printable 649 * characters are hex escaped in the format \\x%02X, eg: 650 * \x00 \x05 etc 651 * 652 * @param b array to write out 653 * @param off offset to start at 654 * @param len length to write 655 * @return string output 656 */ 657 public static String toStringBinary(final byte [] b, int off, int len) { 658 StringBuilder result = new StringBuilder(); 659 // Just in case we are passed a 'len' that is > buffer length... 660 if (off >= b.length) return result.toString(); 661 if (off + len > b.length) len = b.length - off; 662 for (int i = off; i < off + len ; ++i) { 663 int ch = b[i] & 0xFF; 664 if (ch >= ' ' && ch <= '~' && ch != '\\') { 665 result.append((char)ch); 666 } else { 667 result.append("\\x"); 668 result.append(HEX_CHARS_UPPER[ch / 0x10]); 669 result.append(HEX_CHARS_UPPER[ch % 0x10]); 670 } 671 } 672 return result.toString(); 673 } 674 675 private static boolean isHexDigit(char c) { 676 return 677 (c >= 'A' && c <= 'F') || 678 (c >= '0' && c <= '9'); 679 } 680 681 /** 682 * Takes a ASCII digit in the range A-F0-9 and returns 683 * the corresponding integer/ordinal value. 684 * @param ch The hex digit. 685 * @return The converted hex value as a byte. 686 */ 687 public static byte toBinaryFromHex(byte ch) { 688 if (ch >= 'A' && ch <= 'F') 689 return (byte) ((byte)10 + (byte) (ch - 'A')); 690 // else 691 return (byte) (ch - '0'); 692 } 693 694 public static byte [] toBytesBinary(String in) { 695 // this may be bigger than we need, but let's be safe. 696 byte [] b = new byte[in.length()]; 697 int size = 0; 698 for (int i = 0; i < in.length(); ++i) { 699 char ch = in.charAt(i); 700 if (ch == '\\' && in.length() > i+1 && in.charAt(i+1) == 'x') { 701 // ok, take next 2 hex digits. 702 char hd1 = in.charAt(i+2); 703 char hd2 = in.charAt(i+3); 704 705 // they need to be A-F0-9: 706 if (!isHexDigit(hd1) || 707 !isHexDigit(hd2)) { 708 // bogus escape code, ignore: 709 continue; 710 } 711 // turn hex ASCII digit -> number 712 byte d = (byte) ((toBinaryFromHex((byte)hd1) << 4) + toBinaryFromHex((byte)hd2)); 713 714 b[size++] = d; 715 i += 3; // skip 3 716 } else { 717 b[size++] = (byte) ch; 718 } 719 } 720 // resize: 721 byte [] b2 = new byte[size]; 722 System.arraycopy(b, 0, b2, 0, size); 723 return b2; 724 } 725 726 /** 727 * Converts a string to a UTF-8 byte array. 728 * @param s string 729 * @return the byte array 730 */ 731 public static byte[] toBytes(String s) { 732 try { 733 return s.getBytes(UTF8_CSN); 734 } catch (UnsupportedEncodingException e) { 735 // should never happen! 736 throw new IllegalArgumentException("UTF8 decoding is not supported", e); 737 } 738 } 739 740 /** 741 * Convert a boolean to a byte array. True becomes -1 742 * and false becomes 0. 743 * 744 * @param b value 745 * @return <code>b</code> encoded in a byte array. 746 */ 747 public static byte [] toBytes(final boolean b) { 748 return new byte[] { b ? (byte) -1 : (byte) 0 }; 749 } 750 751 /** 752 * Reverses {@link #toBytes(boolean)} 753 * @param b array 754 * @return True or false. 755 */ 756 public static boolean toBoolean(final byte [] b) { 757 if (b.length != 1) { 758 throw new IllegalArgumentException("Array has wrong size: " + b.length); 759 } 760 return b[0] != (byte) 0; 761 } 762 763 /** 764 * Convert a long value to a byte array using big-endian. 765 * 766 * @param val value to convert 767 * @return the byte array 768 */ 769 public static byte[] toBytes(long val) { 770 byte [] b = new byte[8]; 771 for (int i = 7; i > 0; i--) { 772 b[i] = (byte) val; 773 val >>>= 8; 774 } 775 b[0] = (byte) val; 776 return b; 777 } 778 779 /** 780 * Converts a byte array to a long value. Reverses 781 * {@link #toBytes(long)} 782 * @param bytes array 783 * @return the long value 784 */ 785 public static long toLong(byte[] bytes) { 786 return toLong(bytes, 0, SIZEOF_LONG); 787 } 788 789 /** 790 * Converts a byte array to a long value. Assumes there will be 791 * {@link #SIZEOF_LONG} bytes available. 792 * 793 * @param bytes bytes 794 * @param offset offset 795 * @return the long value 796 */ 797 public static long toLong(byte[] bytes, int offset) { 798 return toLong(bytes, offset, SIZEOF_LONG); 799 } 800 801 /** 802 * Converts a byte array to a long value. 803 * 804 * @param bytes array of bytes 805 * @param offset offset into array 806 * @param length length of data (must be {@link #SIZEOF_LONG}) 807 * @return the long value 808 * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or 809 * if there's not enough room in the array at the offset indicated. 810 */ 811 public static long toLong(byte[] bytes, int offset, final int length) { 812 if (length != SIZEOF_LONG || offset + length > bytes.length) { 813 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG); 814 } 815 return ConverterHolder.BEST_CONVERTER.toLong(bytes, offset, length); 816 } 817 818 private static IllegalArgumentException 819 explainWrongLengthOrOffset(final byte[] bytes, 820 final int offset, 821 final int length, 822 final int expectedLength) { 823 String reason; 824 if (length != expectedLength) { 825 reason = "Wrong length: " + length + ", expected " + expectedLength; 826 } else { 827 reason = "offset (" + offset + ") + length (" + length + ") exceed the" 828 + " capacity of the array: " + bytes.length; 829 } 830 return new IllegalArgumentException(reason); 831 } 832 833 /** 834 * Put a long value out to the specified byte array position. 835 * @param bytes the byte array 836 * @param offset position in the array 837 * @param val long to write out 838 * @return incremented offset 839 * @throws IllegalArgumentException if the byte array given doesn't have 840 * enough room at the offset specified. 841 */ 842 public static int putLong(byte[] bytes, int offset, long val) { 843 if (bytes.length - offset < SIZEOF_LONG) { 844 throw new IllegalArgumentException("Not enough room to put a long at" 845 + " offset " + offset + " in a " + bytes.length + " byte array"); 846 } 847 return ConverterHolder.BEST_CONVERTER.putLong(bytes, offset, val); 848 } 849 850 /** 851 * Put a long value out to the specified byte array position (Unsafe). 852 * @param bytes the byte array 853 * @param offset position in the array 854 * @param val long to write out 855 * @return incremented offset 856 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 857 */ 858 @Deprecated 859 public static int putLongUnsafe(byte[] bytes, int offset, long val) { 860 return UnsafeAccess.putLong(bytes, offset, val); 861 } 862 863 /** 864 * Presumes float encoded as IEEE 754 floating-point "single format" 865 * @param bytes byte array 866 * @return Float made from passed byte array. 867 */ 868 public static float toFloat(byte [] bytes) { 869 return toFloat(bytes, 0); 870 } 871 872 /** 873 * Presumes float encoded as IEEE 754 floating-point "single format" 874 * @param bytes array to convert 875 * @param offset offset into array 876 * @return Float made from passed byte array. 877 */ 878 public static float toFloat(byte [] bytes, int offset) { 879 return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT)); 880 } 881 882 /** 883 * @param bytes byte array 884 * @param offset offset to write to 885 * @param f float value 886 * @return New offset in <code>bytes</code> 887 */ 888 public static int putFloat(byte [] bytes, int offset, float f) { 889 return putInt(bytes, offset, Float.floatToRawIntBits(f)); 890 } 891 892 /** 893 * @param f float value 894 * @return the float represented as byte [] 895 */ 896 public static byte [] toBytes(final float f) { 897 // Encode it as int 898 return Bytes.toBytes(Float.floatToRawIntBits(f)); 899 } 900 901 /** 902 * @param bytes byte array 903 * @return Return double made from passed bytes. 904 */ 905 public static double toDouble(final byte [] bytes) { 906 return toDouble(bytes, 0); 907 } 908 909 /** 910 * @param bytes byte array 911 * @param offset offset where double is 912 * @return Return double made from passed bytes. 913 */ 914 public static double toDouble(final byte [] bytes, final int offset) { 915 return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); 916 } 917 918 /** 919 * @param bytes byte array 920 * @param offset offset to write to 921 * @param d value 922 * @return New offset into array <code>bytes</code> 923 */ 924 public static int putDouble(byte [] bytes, int offset, double d) { 925 return putLong(bytes, offset, Double.doubleToLongBits(d)); 926 } 927 928 /** 929 * Serialize a double as the IEEE 754 double format output. The resultant 930 * array will be 8 bytes long. 931 * 932 * @param d value 933 * @return the double represented as byte [] 934 */ 935 public static byte [] toBytes(final double d) { 936 // Encode it as a long 937 return Bytes.toBytes(Double.doubleToRawLongBits(d)); 938 } 939 940 /** 941 * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt 942 * does. 943 * 944 * @param val value 945 * @return the byte array 946 */ 947 public static byte[] toBytes(int val) { 948 byte [] b = new byte[4]; 949 for(int i = 3; i > 0; i--) { 950 b[i] = (byte) val; 951 val >>>= 8; 952 } 953 b[0] = (byte) val; 954 return b; 955 } 956 957 /** 958 * Converts a byte array to an int value 959 * @param bytes byte array 960 * @return the int value 961 */ 962 public static int toInt(byte[] bytes) { 963 return toInt(bytes, 0, SIZEOF_INT); 964 } 965 966 /** 967 * Converts a byte array to an int value 968 * @param bytes byte array 969 * @param offset offset into array 970 * @return the int value 971 */ 972 public static int toInt(byte[] bytes, int offset) { 973 return toInt(bytes, offset, SIZEOF_INT); 974 } 975 976 /** 977 * Converts a byte array to an int value 978 * @param bytes byte array 979 * @param offset offset into array 980 * @param length length of int (has to be {@link #SIZEOF_INT}) 981 * @return the int value 982 * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or 983 * if there's not enough room in the array at the offset indicated. 984 */ 985 public static int toInt(byte[] bytes, int offset, final int length) { 986 if (length != SIZEOF_INT || offset + length > bytes.length) { 987 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT); 988 } 989 return ConverterHolder.BEST_CONVERTER.toInt(bytes, offset, length); 990 } 991 992 /** 993 * Converts a byte array to an int value (Unsafe version) 994 * @param bytes byte array 995 * @param offset offset into array 996 * @return the int value 997 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 998 */ 999 @Deprecated 1000 public static int toIntUnsafe(byte[] bytes, int offset) { 1001 return UnsafeAccess.toInt(bytes, offset); 1002 } 1003 1004 /** 1005 * Converts a byte array to an short value (Unsafe version) 1006 * @param bytes byte array 1007 * @param offset offset into array 1008 * @return the short value 1009 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1010 */ 1011 @Deprecated 1012 public static short toShortUnsafe(byte[] bytes, int offset) { 1013 return UnsafeAccess.toShort(bytes, offset); 1014 } 1015 1016 /** 1017 * Converts a byte array to an long value (Unsafe version) 1018 * @param bytes byte array 1019 * @param offset offset into array 1020 * @return the long value 1021 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1022 */ 1023 @Deprecated 1024 public static long toLongUnsafe(byte[] bytes, int offset) { 1025 return UnsafeAccess.toLong(bytes, offset); 1026 } 1027 1028 /** 1029 * Converts a byte array to an int value 1030 * @param bytes byte array 1031 * @param offset offset into array 1032 * @param length how many bytes should be considered for creating int 1033 * @return the int value 1034 * @throws IllegalArgumentException if there's not enough room in the array at the offset 1035 * indicated. 1036 */ 1037 public static int readAsInt(byte[] bytes, int offset, final int length) { 1038 if (offset + length > bytes.length) { 1039 throw new IllegalArgumentException("offset (" + offset + ") + length (" + length 1040 + ") exceed the" + " capacity of the array: " + bytes.length); 1041 } 1042 int n = 0; 1043 for(int i = offset; i < (offset + length); i++) { 1044 n <<= 8; 1045 n ^= bytes[i] & 0xFF; 1046 } 1047 return n; 1048 } 1049 1050 /** 1051 * Put an int value out to the specified byte array position. 1052 * @param bytes the byte array 1053 * @param offset position in the array 1054 * @param val int to write out 1055 * @return incremented offset 1056 * @throws IllegalArgumentException if the byte array given doesn't have 1057 * enough room at the offset specified. 1058 */ 1059 public static int putInt(byte[] bytes, int offset, int val) { 1060 if (bytes.length - offset < SIZEOF_INT) { 1061 throw new IllegalArgumentException("Not enough room to put an int at" 1062 + " offset " + offset + " in a " + bytes.length + " byte array"); 1063 } 1064 return ConverterHolder.BEST_CONVERTER.putInt(bytes, offset, val); 1065 } 1066 1067 /** 1068 * Put an int value out to the specified byte array position (Unsafe). 1069 * @param bytes the byte array 1070 * @param offset position in the array 1071 * @param val int to write out 1072 * @return incremented offset 1073 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1074 */ 1075 @Deprecated 1076 public static int putIntUnsafe(byte[] bytes, int offset, int val) { 1077 return UnsafeAccess.putInt(bytes, offset, val); 1078 } 1079 1080 /** 1081 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long. 1082 * @param val value 1083 * @return the byte array 1084 */ 1085 public static byte[] toBytes(short val) { 1086 byte[] b = new byte[SIZEOF_SHORT]; 1087 b[1] = (byte) val; 1088 val >>= 8; 1089 b[0] = (byte) val; 1090 return b; 1091 } 1092 1093 /** 1094 * Converts a byte array to a short value 1095 * @param bytes byte array 1096 * @return the short value 1097 */ 1098 public static short toShort(byte[] bytes) { 1099 return toShort(bytes, 0, SIZEOF_SHORT); 1100 } 1101 1102 /** 1103 * Converts a byte array to a short value 1104 * @param bytes byte array 1105 * @param offset offset into array 1106 * @return the short value 1107 */ 1108 public static short toShort(byte[] bytes, int offset) { 1109 return toShort(bytes, offset, SIZEOF_SHORT); 1110 } 1111 1112 /** 1113 * Converts a byte array to a short value 1114 * @param bytes byte array 1115 * @param offset offset into array 1116 * @param length length, has to be {@link #SIZEOF_SHORT} 1117 * @return the short value 1118 * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT} 1119 * or if there's not enough room in the array at the offset indicated. 1120 */ 1121 public static short toShort(byte[] bytes, int offset, final int length) { 1122 if (length != SIZEOF_SHORT || offset + length > bytes.length) { 1123 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT); 1124 } 1125 return ConverterHolder.BEST_CONVERTER.toShort(bytes, offset, length); 1126 } 1127 1128 /** 1129 * Returns a new byte array, copied from the given {@code buf}, 1130 * from the position (inclusive) to the limit (exclusive). 1131 * The position and the other index parameters are not changed. 1132 * 1133 * @param buf a byte buffer 1134 * @return the byte array 1135 * @see #toBytes(ByteBuffer) 1136 */ 1137 public static byte[] getBytes(ByteBuffer buf) { 1138 return readBytes(buf.duplicate()); 1139 } 1140 1141 /** 1142 * Put a short value out to the specified byte array position. 1143 * @param bytes the byte array 1144 * @param offset position in the array 1145 * @param val short to write out 1146 * @return incremented offset 1147 * @throws IllegalArgumentException if the byte array given doesn't have 1148 * enough room at the offset specified. 1149 */ 1150 public static int putShort(byte[] bytes, int offset, short val) { 1151 if (bytes.length - offset < SIZEOF_SHORT) { 1152 throw new IllegalArgumentException("Not enough room to put a short at" 1153 + " offset " + offset + " in a " + bytes.length + " byte array"); 1154 } 1155 return ConverterHolder.BEST_CONVERTER.putShort(bytes, offset, val); 1156 } 1157 1158 /** 1159 * Put a short value out to the specified byte array position (Unsafe). 1160 * @param bytes the byte array 1161 * @param offset position in the array 1162 * @param val short to write out 1163 * @return incremented offset 1164 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1165 */ 1166 @Deprecated 1167 public static int putShortUnsafe(byte[] bytes, int offset, short val) { 1168 return UnsafeAccess.putShort(bytes, offset, val); 1169 } 1170 1171 /** 1172 * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of 1173 * the short will be put into the array. The caller of the API need to make sure they will not 1174 * loose the value by doing so. This is useful to store an unsigned short which is represented as 1175 * int in other parts. 1176 * @param bytes the byte array 1177 * @param offset position in the array 1178 * @param val value to write out 1179 * @return incremented offset 1180 * @throws IllegalArgumentException if the byte array given doesn't have 1181 * enough room at the offset specified. 1182 */ 1183 public static int putAsShort(byte[] bytes, int offset, int val) { 1184 if (bytes.length - offset < SIZEOF_SHORT) { 1185 throw new IllegalArgumentException("Not enough room to put a short at" 1186 + " offset " + offset + " in a " + bytes.length + " byte array"); 1187 } 1188 bytes[offset+1] = (byte) val; 1189 val >>= 8; 1190 bytes[offset] = (byte) val; 1191 return offset + SIZEOF_SHORT; 1192 } 1193 1194 /** 1195 * Convert a BigDecimal value to a byte array 1196 * 1197 * @param val 1198 * @return the byte array 1199 */ 1200 public static byte[] toBytes(BigDecimal val) { 1201 byte[] valueBytes = val.unscaledValue().toByteArray(); 1202 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1203 int offset = putInt(result, 0, val.scale()); 1204 putBytes(result, offset, valueBytes, 0, valueBytes.length); 1205 return result; 1206 } 1207 1208 1209 /** 1210 * Converts a byte array to a BigDecimal 1211 * 1212 * @param bytes 1213 * @return the char value 1214 */ 1215 public static BigDecimal toBigDecimal(byte[] bytes) { 1216 return toBigDecimal(bytes, 0, bytes.length); 1217 } 1218 1219 /** 1220 * Converts a byte array to a BigDecimal value 1221 * 1222 * @param bytes 1223 * @param offset 1224 * @param length 1225 * @return the char value 1226 */ 1227 public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) { 1228 if (bytes == null || length < SIZEOF_INT + 1 || 1229 (offset + length > bytes.length)) { 1230 return null; 1231 } 1232 1233 int scale = toInt(bytes, offset); 1234 byte[] tcBytes = new byte[length - SIZEOF_INT]; 1235 System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); 1236 return new BigDecimal(new BigInteger(tcBytes), scale); 1237 } 1238 1239 /** 1240 * Put a BigDecimal value out to the specified byte array position. 1241 * 1242 * @param bytes the byte array 1243 * @param offset position in the array 1244 * @param val BigDecimal to write out 1245 * @return incremented offset 1246 */ 1247 public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { 1248 if (bytes == null) { 1249 return offset; 1250 } 1251 1252 byte[] valueBytes = val.unscaledValue().toByteArray(); 1253 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1254 offset = putInt(result, offset, val.scale()); 1255 return putBytes(result, offset, valueBytes, 0, valueBytes.length); 1256 } 1257 1258 /** 1259 * @param vint Integer to make a vint of. 1260 * @return Vint as bytes array. 1261 */ 1262 public static byte [] vintToBytes(final long vint) { 1263 long i = vint; 1264 int size = WritableUtils.getVIntSize(i); 1265 byte [] result = new byte[size]; 1266 int offset = 0; 1267 if (i >= -112 && i <= 127) { 1268 result[offset] = (byte) i; 1269 return result; 1270 } 1271 1272 int len = -112; 1273 if (i < 0) { 1274 i ^= -1L; // take one's complement' 1275 len = -120; 1276 } 1277 1278 long tmp = i; 1279 while (tmp != 0) { 1280 tmp = tmp >> 8; 1281 len--; 1282 } 1283 1284 result[offset++] = (byte) len; 1285 1286 len = (len < -120) ? -(len + 120) : -(len + 112); 1287 1288 for (int idx = len; idx != 0; idx--) { 1289 int shiftbits = (idx - 1) * 8; 1290 long mask = 0xFFL << shiftbits; 1291 result[offset++] = (byte)((i & mask) >> shiftbits); 1292 } 1293 return result; 1294 } 1295 1296 /** 1297 * @param buffer buffer to convert 1298 * @return vint bytes as an integer. 1299 */ 1300 public static long bytesToVint(final byte [] buffer) { 1301 int offset = 0; 1302 byte firstByte = buffer[offset++]; 1303 int len = WritableUtils.decodeVIntSize(firstByte); 1304 if (len == 1) { 1305 return firstByte; 1306 } 1307 long i = 0; 1308 for (int idx = 0; idx < len-1; idx++) { 1309 byte b = buffer[offset++]; 1310 i = i << 8; 1311 i = i | (b & 0xFF); 1312 } 1313 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1314 } 1315 1316 /** 1317 * Reads a zero-compressed encoded long from input buffer and returns it. 1318 * @param buffer Binary array 1319 * @param offset Offset into array at which vint begins. 1320 * @throws java.io.IOException e 1321 * @return deserialized long from buffer. 1322 * @deprecated Use {@link #readAsVLong(byte[],int)} instead. 1323 */ 1324 @Deprecated 1325 public static long readVLong(final byte [] buffer, final int offset) 1326 throws IOException { 1327 return readAsVLong(buffer, offset); 1328 } 1329 1330 /** 1331 * Reads a zero-compressed encoded long from input buffer and returns it. 1332 * @param buffer Binary array 1333 * @param offset Offset into array at which vint begins. 1334 * @return deserialized long from buffer. 1335 */ 1336 public static long readAsVLong(final byte [] buffer, final int offset) { 1337 byte firstByte = buffer[offset]; 1338 int len = WritableUtils.decodeVIntSize(firstByte); 1339 if (len == 1) { 1340 return firstByte; 1341 } 1342 long i = 0; 1343 for (int idx = 0; idx < len-1; idx++) { 1344 byte b = buffer[offset + 1 + idx]; 1345 i = i << 8; 1346 i = i | (b & 0xFF); 1347 } 1348 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1349 } 1350 1351 /** 1352 * @param left left operand 1353 * @param right right operand 1354 * @return 0 if equal, < 0 if left is less than right, etc. 1355 */ 1356 public static int compareTo(final byte [] left, final byte [] right) { 1357 return LexicographicalComparerHolder.BEST_COMPARER. 1358 compareTo(left, 0, left.length, right, 0, right.length); 1359 } 1360 1361 /** 1362 * Lexicographically compare two arrays. 1363 * 1364 * @param buffer1 left operand 1365 * @param buffer2 right operand 1366 * @param offset1 Where to start comparing in the left buffer 1367 * @param offset2 Where to start comparing in the right buffer 1368 * @param length1 How much to compare from the left buffer 1369 * @param length2 How much to compare from the right buffer 1370 * @return 0 if equal, < 0 if left is less than right, etc. 1371 */ 1372 public static int compareTo(byte[] buffer1, int offset1, int length1, 1373 byte[] buffer2, int offset2, int length2) { 1374 return LexicographicalComparerHolder.BEST_COMPARER. 1375 compareTo(buffer1, offset1, length1, buffer2, offset2, length2); 1376 } 1377 1378 interface Comparer<T> { 1379 int compareTo( 1380 T buffer1, int offset1, int length1, T buffer2, int offset2, int length2 1381 ); 1382 } 1383 1384 static abstract class Converter { 1385 abstract long toLong(byte[] bytes, int offset, int length); 1386 abstract int putLong(byte[] bytes, int offset, long val); 1387 1388 abstract int toInt(byte[] bytes, int offset, final int length); 1389 abstract int putInt(byte[] bytes, int offset, int val); 1390 1391 abstract short toShort(byte[] bytes, int offset, final int length); 1392 abstract int putShort(byte[] bytes, int offset, short val); 1393 1394 } 1395 1396 @VisibleForTesting 1397 static Comparer<byte[]> lexicographicalComparerJavaImpl() { 1398 return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; 1399 } 1400 1401 static class ConverterHolder { 1402 static final String UNSAFE_CONVERTER_NAME = 1403 ConverterHolder.class.getName() + "$UnsafeConverter"; 1404 1405 static final Converter BEST_CONVERTER = getBestConverter(); 1406 /** 1407 * Returns the Unsafe-using Converter, or falls back to the pure-Java 1408 * implementation if unable to do so. 1409 */ 1410 static Converter getBestConverter() { 1411 try { 1412 Class<?> theClass = Class.forName(UNSAFE_CONVERTER_NAME); 1413 1414 // yes, UnsafeComparer does implement Comparer<byte[]> 1415 @SuppressWarnings("unchecked") 1416 Converter converter = (Converter) theClass.getConstructor().newInstance(); 1417 return converter; 1418 } catch (Throwable t) { // ensure we really catch *everything* 1419 return PureJavaConverter.INSTANCE; 1420 } 1421 } 1422 1423 protected static final class PureJavaConverter extends Converter { 1424 static final PureJavaConverter INSTANCE = new PureJavaConverter(); 1425 1426 private PureJavaConverter() {} 1427 1428 @Override 1429 long toLong(byte[] bytes, int offset, int length) { 1430 long l = 0; 1431 for(int i = offset; i < offset + length; i++) { 1432 l <<= 8; 1433 l ^= bytes[i] & 0xFF; 1434 } 1435 return l; 1436 } 1437 1438 @Override 1439 int putLong(byte[] bytes, int offset, long val) { 1440 for(int i = offset + 7; i > offset; i--) { 1441 bytes[i] = (byte) val; 1442 val >>>= 8; 1443 } 1444 bytes[offset] = (byte) val; 1445 return offset + SIZEOF_LONG; 1446 } 1447 1448 @Override 1449 int toInt(byte[] bytes, int offset, int length) { 1450 int n = 0; 1451 for(int i = offset; i < (offset + length); i++) { 1452 n <<= 8; 1453 n ^= bytes[i] & 0xFF; 1454 } 1455 return n; 1456 } 1457 1458 @Override 1459 int putInt(byte[] bytes, int offset, int val) { 1460 for(int i= offset + 3; i > offset; i--) { 1461 bytes[i] = (byte) val; 1462 val >>>= 8; 1463 } 1464 bytes[offset] = (byte) val; 1465 return offset + SIZEOF_INT; 1466 } 1467 1468 @Override 1469 short toShort(byte[] bytes, int offset, int length) { 1470 short n = 0; 1471 n = (short) ((n ^ bytes[offset]) & 0xFF); 1472 n = (short) (n << 8); 1473 n ^= (short) (bytes[offset+1] & 0xFF); 1474 return n; 1475 } 1476 1477 @Override 1478 int putShort(byte[] bytes, int offset, short val) { 1479 bytes[offset+1] = (byte) val; 1480 val >>= 8; 1481 bytes[offset] = (byte) val; 1482 return offset + SIZEOF_SHORT; 1483 } 1484 } 1485 1486 protected static final class UnsafeConverter extends Converter { 1487 1488 static final Unsafe theUnsafe; 1489 1490 public UnsafeConverter() {} 1491 1492 static { 1493 if (UNSAFE_UNALIGNED) { 1494 theUnsafe = UnsafeAccess.theUnsafe; 1495 } else { 1496 // It doesn't matter what we throw; 1497 // it's swallowed in getBestComparer(). 1498 throw new Error(); 1499 } 1500 1501 // sanity check - this should never fail 1502 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 1503 throw new AssertionError(); 1504 } 1505 } 1506 1507 @Override 1508 long toLong(byte[] bytes, int offset, int length) { 1509 return UnsafeAccess.toLong(bytes, offset); 1510 } 1511 1512 @Override 1513 int putLong(byte[] bytes, int offset, long val) { 1514 return UnsafeAccess.putLong(bytes, offset, val); 1515 } 1516 1517 @Override 1518 int toInt(byte[] bytes, int offset, int length) { 1519 return UnsafeAccess.toInt(bytes, offset); 1520 } 1521 1522 @Override 1523 int putInt(byte[] bytes, int offset, int val) { 1524 return UnsafeAccess.putInt(bytes, offset, val); 1525 } 1526 1527 @Override 1528 short toShort(byte[] bytes, int offset, int length) { 1529 return UnsafeAccess.toShort(bytes, offset); 1530 } 1531 1532 @Override 1533 int putShort(byte[] bytes, int offset, short val) { 1534 return UnsafeAccess.putShort(bytes, offset, val); 1535 } 1536 } 1537 } 1538 1539 /** 1540 * Provides a lexicographical comparer implementation; either a Java 1541 * implementation or a faster implementation based on {@link Unsafe}. 1542 * 1543 * <p>Uses reflection to gracefully fall back to the Java implementation if 1544 * {@code Unsafe} isn't available. 1545 */ 1546 @VisibleForTesting 1547 static class LexicographicalComparerHolder { 1548 static final String UNSAFE_COMPARER_NAME = 1549 LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; 1550 1551 static final Comparer<byte[]> BEST_COMPARER = getBestComparer(); 1552 /** 1553 * Returns the Unsafe-using Comparer, or falls back to the pure-Java 1554 * implementation if unable to do so. 1555 */ 1556 static Comparer<byte[]> getBestComparer() { 1557 try { 1558 Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME); 1559 1560 // yes, UnsafeComparer does implement Comparer<byte[]> 1561 @SuppressWarnings("unchecked") 1562 Comparer<byte[]> comparer = 1563 (Comparer<byte[]>) theClass.getEnumConstants()[0]; 1564 return comparer; 1565 } catch (Throwable t) { // ensure we really catch *everything* 1566 return lexicographicalComparerJavaImpl(); 1567 } 1568 } 1569 1570 enum PureJavaComparer implements Comparer<byte[]> { 1571 INSTANCE; 1572 1573 @Override 1574 public int compareTo(byte[] buffer1, int offset1, int length1, 1575 byte[] buffer2, int offset2, int length2) { 1576 // Short circuit equal case 1577 if (buffer1 == buffer2 && 1578 offset1 == offset2 && 1579 length1 == length2) { 1580 return 0; 1581 } 1582 // Bring WritableComparator code local 1583 int end1 = offset1 + length1; 1584 int end2 = offset2 + length2; 1585 for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { 1586 int a = (buffer1[i] & 0xff); 1587 int b = (buffer2[j] & 0xff); 1588 if (a != b) { 1589 return a - b; 1590 } 1591 } 1592 return length1 - length2; 1593 } 1594 } 1595 1596 @VisibleForTesting 1597 enum UnsafeComparer implements Comparer<byte[]> { 1598 INSTANCE; 1599 1600 static final Unsafe theUnsafe; 1601 static { 1602 if (UNSAFE_UNALIGNED) { 1603 theUnsafe = UnsafeAccess.theUnsafe; 1604 } else { 1605 // It doesn't matter what we throw; 1606 // it's swallowed in getBestComparer(). 1607 throw new Error(); 1608 } 1609 1610 // sanity check - this should never fail 1611 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 1612 throw new AssertionError(); 1613 } 1614 } 1615 1616 /** 1617 * Lexicographically compare two arrays. 1618 * 1619 * @param buffer1 left operand 1620 * @param buffer2 right operand 1621 * @param offset1 Where to start comparing in the left buffer 1622 * @param offset2 Where to start comparing in the right buffer 1623 * @param length1 How much to compare from the left buffer 1624 * @param length2 How much to compare from the right buffer 1625 * @return 0 if equal, < 0 if left is less than right, etc. 1626 */ 1627 @Override 1628 public int compareTo(byte[] buffer1, int offset1, int length1, 1629 byte[] buffer2, int offset2, int length2) { 1630 1631 // Short circuit equal case 1632 if (buffer1 == buffer2 && 1633 offset1 == offset2 && 1634 length1 == length2) { 1635 return 0; 1636 } 1637 final int stride = 8; 1638 final int minLength = Math.min(length1, length2); 1639 int strideLimit = minLength & ~(stride - 1); 1640 final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1641 final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1642 int i; 1643 1644 /* 1645 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower 1646 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit. 1647 */ 1648 for (i = 0; i < strideLimit; i += stride) { 1649 long lw = theUnsafe.getLong(buffer1, offset1Adj + i); 1650 long rw = theUnsafe.getLong(buffer2, offset2Adj + i); 1651 if (lw != rw) { 1652 if(!UnsafeAccess.LITTLE_ENDIAN) { 1653 return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; 1654 } 1655 1656 /* 1657 * We want to compare only the first index where left[index] != right[index]. This 1658 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are 1659 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant 1660 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get 1661 * that least significant nonzero byte. This comparison logic is based on UnsignedBytes 1662 * comparator from guava v21 1663 */ 1664 int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; 1665 return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF)); 1666 } 1667 } 1668 1669 // The epilogue to cover the last (minLength % stride) elements. 1670 for (; i < minLength; i++) { 1671 int a = (buffer1[offset1 + i] & 0xFF); 1672 int b = (buffer2[offset2 + i] & 0xFF); 1673 if (a != b) { 1674 return a - b; 1675 } 1676 } 1677 return length1 - length2; 1678 } 1679 } 1680 } 1681 1682 /** 1683 * @param left left operand 1684 * @param right right operand 1685 * @return True if equal 1686 */ 1687 public static boolean equals(final byte [] left, final byte [] right) { 1688 // Could use Arrays.equals? 1689 //noinspection SimplifiableConditionalExpression 1690 if (left == right) return true; 1691 if (left == null || right == null) return false; 1692 if (left.length != right.length) return false; 1693 if (left.length == 0) return true; 1694 1695 // Since we're often comparing adjacent sorted data, 1696 // it's usual to have equal arrays except for the very last byte 1697 // so check that first 1698 if (left[left.length - 1] != right[right.length - 1]) return false; 1699 1700 return compareTo(left, right) == 0; 1701 } 1702 1703 public static boolean equals(final byte[] left, int leftOffset, int leftLen, 1704 final byte[] right, int rightOffset, int rightLen) { 1705 // short circuit case 1706 if (left == right && 1707 leftOffset == rightOffset && 1708 leftLen == rightLen) { 1709 return true; 1710 } 1711 // different lengths fast check 1712 if (leftLen != rightLen) { 1713 return false; 1714 } 1715 if (leftLen == 0) { 1716 return true; 1717 } 1718 1719 // Since we're often comparing adjacent sorted data, 1720 // it's usual to have equal arrays except for the very last byte 1721 // so check that first 1722 if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) return false; 1723 1724 return LexicographicalComparerHolder.BEST_COMPARER. 1725 compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0; 1726 } 1727 1728 1729 /** 1730 * @param a left operand 1731 * @param buf right operand 1732 * @return True if equal 1733 */ 1734 public static boolean equals(byte[] a, ByteBuffer buf) { 1735 if (a == null) return buf == null; 1736 if (buf == null) return false; 1737 if (a.length != buf.remaining()) return false; 1738 1739 // Thou shalt not modify the original byte buffer in what should be read only operations. 1740 ByteBuffer b = buf.duplicate(); 1741 for (byte anA : a) { 1742 if (anA != b.get()) { 1743 return false; 1744 } 1745 } 1746 return true; 1747 } 1748 1749 1750 /** 1751 * Return true if the byte array on the right is a prefix of the byte 1752 * array on the left. 1753 */ 1754 public static boolean startsWith(byte[] bytes, byte[] prefix) { 1755 return bytes != null && prefix != null && 1756 bytes.length >= prefix.length && 1757 LexicographicalComparerHolder.BEST_COMPARER. 1758 compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0; 1759 } 1760 1761 /** 1762 * @param b bytes to hash 1763 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the 1764 * passed in array. This method is what {@link org.apache.hadoop.io.Text} 1765 * use calculating hash code. 1766 */ 1767 public static int hashCode(final byte [] b) { 1768 return hashCode(b, b.length); 1769 } 1770 1771 /** 1772 * @param b value 1773 * @param length length of the value 1774 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the 1775 * passed in array. This method is what {@link org.apache.hadoop.io.Text} 1776 * use calculating hash code. 1777 */ 1778 public static int hashCode(final byte [] b, final int length) { 1779 return WritableComparator.hashBytes(b, length); 1780 } 1781 1782 /** 1783 * @param b bytes to hash 1784 * @return A hash of <code>b</code> as an Integer that can be used as key in 1785 * Maps. 1786 */ 1787 public static Integer mapKey(final byte [] b) { 1788 return hashCode(b); 1789 } 1790 1791 /** 1792 * @param b bytes to hash 1793 * @param length length to hash 1794 * @return A hash of <code>b</code> as an Integer that can be used as key in 1795 * Maps. 1796 */ 1797 public static Integer mapKey(final byte [] b, final int length) { 1798 return hashCode(b, length); 1799 } 1800 1801 /** 1802 * @param a lower half 1803 * @param b upper half 1804 * @return New array that has a in lower half and b in upper half. 1805 */ 1806 public static byte [] add(final byte [] a, final byte [] b) { 1807 return add(a, b, EMPTY_BYTE_ARRAY); 1808 } 1809 1810 /** 1811 * @param a first third 1812 * @param b second third 1813 * @param c third third 1814 * @return New array made from a, b and c 1815 */ 1816 public static byte [] add(final byte [] a, final byte [] b, final byte [] c) { 1817 byte [] result = new byte[a.length + b.length + c.length]; 1818 System.arraycopy(a, 0, result, 0, a.length); 1819 System.arraycopy(b, 0, result, a.length, b.length); 1820 System.arraycopy(c, 0, result, a.length + b.length, c.length); 1821 return result; 1822 } 1823 1824 /** 1825 * @param arrays all the arrays to concatenate together. 1826 * @return New array made from the concatenation of the given arrays. 1827 */ 1828 public static byte [] add(final byte [][] arrays) { 1829 int length = 0; 1830 for (int i = 0; i < arrays.length; i++) { 1831 length += arrays[i].length; 1832 } 1833 byte [] result = new byte[length]; 1834 int index = 0; 1835 for (int i = 0; i < arrays.length; i++) { 1836 System.arraycopy(arrays[i], 0, result, index, arrays[i].length); 1837 index += arrays[i].length; 1838 } 1839 return result; 1840 } 1841 1842 /** 1843 * @param a array 1844 * @param length amount of bytes to grab 1845 * @return First <code>length</code> bytes from <code>a</code> 1846 */ 1847 public static byte [] head(final byte [] a, final int length) { 1848 if (a.length < length) { 1849 return null; 1850 } 1851 byte [] result = new byte[length]; 1852 System.arraycopy(a, 0, result, 0, length); 1853 return result; 1854 } 1855 1856 /** 1857 * @param a array 1858 * @param length amount of bytes to snarf 1859 * @return Last <code>length</code> bytes from <code>a</code> 1860 */ 1861 public static byte [] tail(final byte [] a, final int length) { 1862 if (a.length < length) { 1863 return null; 1864 } 1865 byte [] result = new byte[length]; 1866 System.arraycopy(a, a.length - length, result, 0, length); 1867 return result; 1868 } 1869 1870 /** 1871 * @param a array 1872 * @param length new array size 1873 * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes 1874 */ 1875 public static byte [] padHead(final byte [] a, final int length) { 1876 byte [] padding = new byte[length]; 1877 for (int i = 0; i < length; i++) { 1878 padding[i] = 0; 1879 } 1880 return add(padding,a); 1881 } 1882 1883 /** 1884 * @param a array 1885 * @param length new array size 1886 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes 1887 */ 1888 public static byte [] padTail(final byte [] a, final int length) { 1889 byte [] padding = new byte[length]; 1890 for (int i = 0; i < length; i++) { 1891 padding[i] = 0; 1892 } 1893 return add(a,padding); 1894 } 1895 1896 /** 1897 * Split passed range. Expensive operation relatively. Uses BigInteger math. 1898 * Useful splitting ranges for MapReduce jobs. 1899 * @param a Beginning of range 1900 * @param b End of range 1901 * @param num Number of times to split range. Pass 1 if you want to split 1902 * the range in two; i.e. one split. 1903 * @return Array of dividing values 1904 */ 1905 public static byte [][] split(final byte [] a, final byte [] b, final int num) { 1906 return split(a, b, false, num); 1907 } 1908 1909 /** 1910 * Split passed range. Expensive operation relatively. Uses BigInteger math. 1911 * Useful splitting ranges for MapReduce jobs. 1912 * @param a Beginning of range 1913 * @param b End of range 1914 * @param inclusive Whether the end of range is prefix-inclusive or is 1915 * considered an exclusive boundary. Automatic splits are generally exclusive 1916 * and manual splits with an explicit range utilize an inclusive end of range. 1917 * @param num Number of times to split range. Pass 1 if you want to split 1918 * the range in two; i.e. one split. 1919 * @return Array of dividing values 1920 */ 1921 public static byte[][] split(final byte[] a, final byte[] b, 1922 boolean inclusive, final int num) { 1923 byte[][] ret = new byte[num + 2][]; 1924 int i = 0; 1925 Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num); 1926 if (iter == null) 1927 return null; 1928 for (byte[] elem : iter) { 1929 ret[i++] = elem; 1930 } 1931 return ret; 1932 } 1933 1934 /** 1935 * Iterate over keys within the passed range, splitting at an [a,b) boundary. 1936 */ 1937 public static Iterable<byte[]> iterateOnSplits(final byte[] a, 1938 final byte[] b, final int num) 1939 { 1940 return iterateOnSplits(a, b, false, num); 1941 } 1942 1943 /** 1944 * Iterate over keys within the passed range. 1945 */ 1946 public static Iterable<byte[]> iterateOnSplits( 1947 final byte[] a, final byte[]b, boolean inclusive, final int num) 1948 { 1949 byte [] aPadded; 1950 byte [] bPadded; 1951 if (a.length < b.length) { 1952 aPadded = padTail(a, b.length - a.length); 1953 bPadded = b; 1954 } else if (b.length < a.length) { 1955 aPadded = a; 1956 bPadded = padTail(b, a.length - b.length); 1957 } else { 1958 aPadded = a; 1959 bPadded = b; 1960 } 1961 if (compareTo(aPadded,bPadded) >= 0) { 1962 throw new IllegalArgumentException("b <= a"); 1963 } 1964 if (num <= 0) { 1965 throw new IllegalArgumentException("num cannot be <= 0"); 1966 } 1967 byte [] prependHeader = {1, 0}; 1968 final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); 1969 final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); 1970 BigInteger diffBI = stopBI.subtract(startBI); 1971 if (inclusive) { 1972 diffBI = diffBI.add(BigInteger.ONE); 1973 } 1974 final BigInteger splitsBI = BigInteger.valueOf(num + 1); 1975 //when diffBI < splitBI, use an additional byte to increase diffBI 1976 if(diffBI.compareTo(splitsBI) < 0) { 1977 byte[] aPaddedAdditional = new byte[aPadded.length+1]; 1978 byte[] bPaddedAdditional = new byte[bPadded.length+1]; 1979 for (int i = 0; i < aPadded.length; i++){ 1980 aPaddedAdditional[i] = aPadded[i]; 1981 } 1982 for (int j = 0; j < bPadded.length; j++){ 1983 bPaddedAdditional[j] = bPadded[j]; 1984 } 1985 aPaddedAdditional[aPadded.length] = 0; 1986 bPaddedAdditional[bPadded.length] = 0; 1987 return iterateOnSplits(aPaddedAdditional, bPaddedAdditional, inclusive, num); 1988 } 1989 final BigInteger intervalBI; 1990 try { 1991 intervalBI = diffBI.divide(splitsBI); 1992 } catch(Exception e) { 1993 LOG.error("Exception caught during division", e); 1994 return null; 1995 } 1996 1997 final Iterator<byte[]> iterator = new Iterator<byte[]>() { 1998 private int i = -1; 1999 2000 @Override 2001 public boolean hasNext() { 2002 return i < num+1; 2003 } 2004 2005 @Override 2006 public byte[] next() { 2007 i++; 2008 if (i == 0) return a; 2009 if (i == num + 1) return b; 2010 2011 BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); 2012 byte [] padded = curBI.toByteArray(); 2013 if (padded[1] == 0) 2014 padded = tail(padded, padded.length - 2); 2015 else 2016 padded = tail(padded, padded.length - 1); 2017 return padded; 2018 } 2019 2020 @Override 2021 public void remove() { 2022 throw new UnsupportedOperationException(); 2023 } 2024 2025 }; 2026 2027 return new Iterable<byte[]>() { 2028 @Override 2029 public Iterator<byte[]> iterator() { 2030 return iterator; 2031 } 2032 }; 2033 } 2034 2035 /** 2036 * @param bytes array to hash 2037 * @param offset offset to start from 2038 * @param length length to hash 2039 * */ 2040 public static int hashCode(byte[] bytes, int offset, int length) { 2041 int hash = 1; 2042 for (int i = offset; i < offset + length; i++) 2043 hash = (31 * hash) + bytes[i]; 2044 return hash; 2045 } 2046 2047 /** 2048 * @param t operands 2049 * @return Array of byte arrays made from passed array of Text 2050 */ 2051 public static byte [][] toByteArrays(final String [] t) { 2052 byte [][] result = new byte[t.length][]; 2053 for (int i = 0; i < t.length; i++) { 2054 result[i] = Bytes.toBytes(t[i]); 2055 } 2056 return result; 2057 } 2058 2059 /** 2060 * @param t operands 2061 * @return Array of binary byte arrays made from passed array of binary strings 2062 */ 2063 public static byte[][] toBinaryByteArrays(final String[] t) { 2064 byte[][] result = new byte[t.length][]; 2065 for (int i = 0; i < t.length; i++) { 2066 result[i] = Bytes.toBytesBinary(t[i]); 2067 } 2068 return result; 2069 } 2070 2071 /** 2072 * @param column operand 2073 * @return A byte array of a byte array where first and only entry is 2074 * <code>column</code> 2075 */ 2076 public static byte [][] toByteArrays(final String column) { 2077 return toByteArrays(toBytes(column)); 2078 } 2079 2080 /** 2081 * @param column operand 2082 * @return A byte array of a byte array where first and only entry is 2083 * <code>column</code> 2084 */ 2085 public static byte [][] toByteArrays(final byte [] column) { 2086 byte [][] result = new byte[1][]; 2087 result[0] = column; 2088 return result; 2089 } 2090 2091 /** 2092 * Binary search for keys in indexes. 2093 * 2094 * @param arr array of byte arrays to search for 2095 * @param key the key you want to find 2096 * @param offset the offset in the key you want to find 2097 * @param length the length of the key 2098 * @param comparator a comparator to compare. 2099 * @return zero-based index of the key, if the key is present in the array. 2100 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2101 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2102 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2103 * means that this function can return 2N + 1 different values 2104 * ranging from -(N + 1) to N - 1. 2105 * @deprecated {@link Bytes#binarySearch(byte[][], byte[], int, int)} 2106 */ 2107 @Deprecated 2108 public static int binarySearch(byte [][]arr, byte []key, int offset, 2109 int length, RawComparator<?> comparator) { 2110 return binarySearch(arr, key, offset, length); 2111 } 2112 2113 /** 2114 * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR. 2115 * 2116 * @param arr array of byte arrays to search for 2117 * @param key the key you want to find 2118 * @param offset the offset in the key you want to find 2119 * @param length the length of the key 2120 * @return zero-based index of the key, if the key is present in the array. 2121 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2122 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2123 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2124 * means that this function can return 2N + 1 different values 2125 * ranging from -(N + 1) to N - 1. 2126 */ 2127 public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { 2128 int low = 0; 2129 int high = arr.length - 1; 2130 2131 while (low <= high) { 2132 int mid = (low + high) >>> 1; 2133 // we have to compare in this order, because the comparator order 2134 // has special logic when the 'left side' is a special key. 2135 int cmp = Bytes.BYTES_RAWCOMPARATOR 2136 .compare(key, offset, length, arr[mid], 0, arr[mid].length); 2137 // key lives above the midpoint 2138 if (cmp > 0) 2139 low = mid + 1; 2140 // key lives below the midpoint 2141 else if (cmp < 0) 2142 high = mid - 1; 2143 // BAM. how often does this really happen? 2144 else 2145 return mid; 2146 } 2147 return -(low + 1); 2148 } 2149 2150 /** 2151 * Binary search for keys in indexes. 2152 * 2153 * @param arr array of byte arrays to search for 2154 * @param key the key you want to find 2155 * @param comparator a comparator to compare. 2156 * @return zero-based index of the key, if the key is present in the array. 2157 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2158 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2159 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2160 * means that this function can return 2N + 1 different values 2161 * ranging from -(N + 1) to N - 1. 2162 * @return the index of the block 2163 * @deprecated Use {@link Bytes#binarySearch(Cell[], Cell, CellComparator)} 2164 */ 2165 @Deprecated 2166 public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) { 2167 int low = 0; 2168 int high = arr.length - 1; 2169 KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); 2170 while (low <= high) { 2171 int mid = (low+high) >>> 1; 2172 // we have to compare in this order, because the comparator order 2173 // has special logic when the 'left side' is a special key. 2174 r.setKey(arr[mid], 0, arr[mid].length); 2175 int cmp = comparator.compare(key, r); 2176 // key lives above the midpoint 2177 if (cmp > 0) 2178 low = mid + 1; 2179 // key lives below the midpoint 2180 else if (cmp < 0) 2181 high = mid - 1; 2182 // BAM. how often does this really happen? 2183 else 2184 return mid; 2185 } 2186 return - (low+1); 2187 } 2188 2189 /** 2190 * Binary search for keys in indexes. 2191 * 2192 * @param arr array of byte arrays to search for 2193 * @param key the key you want to find 2194 * @param comparator a comparator to compare. 2195 * @return zero-based index of the key, if the key is present in the array. 2196 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2197 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2198 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2199 * means that this function can return 2N + 1 different values 2200 * ranging from -(N + 1) to N - 1. 2201 * @return the index of the block 2202 */ 2203 public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) { 2204 int low = 0; 2205 int high = arr.length - 1; 2206 while (low <= high) { 2207 int mid = (low+high) >>> 1; 2208 // we have to compare in this order, because the comparator order 2209 // has special logic when the 'left side' is a special key. 2210 int cmp = comparator.compare(key, arr[mid]); 2211 // key lives above the midpoint 2212 if (cmp > 0) 2213 low = mid + 1; 2214 // key lives below the midpoint 2215 else if (cmp < 0) 2216 high = mid - 1; 2217 // BAM. how often does this really happen? 2218 else 2219 return mid; 2220 } 2221 return - (low+1); 2222 } 2223 2224 /** 2225 * Bytewise binary increment/deincrement of long contained in byte array 2226 * on given amount. 2227 * 2228 * @param value - array of bytes containing long (length <= SIZEOF_LONG) 2229 * @param amount value will be incremented on (deincremented if negative) 2230 * @return array of bytes containing incremented long (length == SIZEOF_LONG) 2231 */ 2232 public static byte [] incrementBytes(byte[] value, long amount) 2233 { 2234 byte[] val = value; 2235 if (val.length < SIZEOF_LONG) { 2236 // Hopefully this doesn't happen too often. 2237 byte [] newvalue; 2238 if (val[0] < 0) { 2239 newvalue = new byte[]{-1, -1, -1, -1, -1, -1, -1, -1}; 2240 } else { 2241 newvalue = new byte[SIZEOF_LONG]; 2242 } 2243 System.arraycopy(val, 0, newvalue, newvalue.length - val.length, 2244 val.length); 2245 val = newvalue; 2246 } else if (val.length > SIZEOF_LONG) { 2247 throw new IllegalArgumentException("Increment Bytes - value too big: " + 2248 val.length); 2249 } 2250 if(amount == 0) return val; 2251 if(val[0] < 0){ 2252 return binaryIncrementNeg(val, amount); 2253 } 2254 return binaryIncrementPos(val, amount); 2255 } 2256 2257 /* increment/deincrement for positive value */ 2258 private static byte [] binaryIncrementPos(byte [] value, long amount) { 2259 long amo = amount; 2260 int sign = 1; 2261 if (amount < 0) { 2262 amo = -amount; 2263 sign = -1; 2264 } 2265 for(int i=0;i<value.length;i++) { 2266 int cur = ((int)amo % 256) * sign; 2267 amo = (amo >> 8); 2268 int val = value[value.length-i-1] & 0x0ff; 2269 int total = val + cur; 2270 if(total > 255) { 2271 amo += sign; 2272 total %= 256; 2273 } else if (total < 0) { 2274 amo -= sign; 2275 } 2276 value[value.length-i-1] = (byte)total; 2277 if (amo == 0) return value; 2278 } 2279 return value; 2280 } 2281 2282 /* increment/deincrement for negative value */ 2283 private static byte [] binaryIncrementNeg(byte [] value, long amount) { 2284 long amo = amount; 2285 int sign = 1; 2286 if (amount < 0) { 2287 amo = -amount; 2288 sign = -1; 2289 } 2290 for(int i=0;i<value.length;i++) { 2291 int cur = ((int)amo % 256) * sign; 2292 amo = (amo >> 8); 2293 int val = ((~value[value.length-i-1]) & 0x0ff) + 1; 2294 int total = cur - val; 2295 if(total >= 0) { 2296 amo += sign; 2297 } else if (total < -256) { 2298 amo -= sign; 2299 total %= 256; 2300 } 2301 value[value.length-i-1] = (byte)total; 2302 if (amo == 0) return value; 2303 } 2304 return value; 2305 } 2306 2307 /** 2308 * Writes a string as a fixed-size field, padded with zeros. 2309 */ 2310 public static void writeStringFixedSize(final DataOutput out, String s, 2311 int size) throws IOException { 2312 byte[] b = toBytes(s); 2313 if (b.length > size) { 2314 throw new IOException("Trying to write " + b.length + " bytes (" + 2315 toStringBinary(b) + ") into a field of length " + size); 2316 } 2317 2318 out.writeBytes(s); 2319 for (int i = 0; i < size - s.length(); ++i) 2320 out.writeByte(0); 2321 } 2322 2323 /** 2324 * Reads a fixed-size field and interprets it as a string padded with zeros. 2325 */ 2326 public static String readStringFixedSize(final DataInput in, int size) 2327 throws IOException { 2328 byte[] b = new byte[size]; 2329 in.readFully(b); 2330 int n = b.length; 2331 while (n > 0 && b[n - 1] == 0) 2332 --n; 2333 2334 return toString(b, 0, n); 2335 } 2336 2337 /** 2338 * Copy the byte array given in parameter and return an instance 2339 * of a new byte array with the same length and the same content. 2340 * @param bytes the byte array to duplicate 2341 * @return a copy of the given byte array 2342 */ 2343 public static byte [] copy(byte [] bytes) { 2344 if (bytes == null) return null; 2345 byte [] result = new byte[bytes.length]; 2346 System.arraycopy(bytes, 0, result, 0, bytes.length); 2347 return result; 2348 } 2349 2350 /** 2351 * Copy the byte array given in parameter and return an instance 2352 * of a new byte array with the same length and the same content. 2353 * @param bytes the byte array to copy from 2354 * @return a copy of the given designated byte array 2355 * @param offset 2356 * @param length 2357 */ 2358 public static byte [] copy(byte [] bytes, final int offset, final int length) { 2359 if (bytes == null) return null; 2360 byte [] result = new byte[length]; 2361 System.arraycopy(bytes, offset, result, 0, length); 2362 return result; 2363 } 2364 2365 /** 2366 * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from 2367 * somewhere. (mcorgan) 2368 * @param a Array to search. Entries must be sorted and unique. 2369 * @param fromIndex First index inclusive of "a" to include in the search. 2370 * @param toIndex Last index exclusive of "a" to include in the search. 2371 * @param key The byte to search for. 2372 * @return The index of key if found. If not found, return -(index + 1), where negative indicates 2373 * "not found" and the "index + 1" handles the "-0" case. 2374 */ 2375 public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { 2376 int unsignedKey = key & 0xff; 2377 int low = fromIndex; 2378 int high = toIndex - 1; 2379 2380 while (low <= high) { 2381 int mid = (low + high) >>> 1; 2382 int midVal = a[mid] & 0xff; 2383 2384 if (midVal < unsignedKey) { 2385 low = mid + 1; 2386 } else if (midVal > unsignedKey) { 2387 high = mid - 1; 2388 } else { 2389 return mid; // key found 2390 } 2391 } 2392 return -(low + 1); // key not found. 2393 } 2394 2395 /** 2396 * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding 2397 * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. 2398 * 2399 * @param input The byte[] to increment. 2400 * @return The incremented copy of "in". May be same length or 1 byte longer. 2401 */ 2402 public static byte[] unsignedCopyAndIncrement(final byte[] input) { 2403 byte[] copy = copy(input); 2404 if (copy == null) { 2405 throw new IllegalArgumentException("cannot increment null array"); 2406 } 2407 for (int i = copy.length - 1; i >= 0; --i) { 2408 if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum 2409 copy[i] = 0; 2410 } else { 2411 ++copy[i]; 2412 return copy; 2413 } 2414 } 2415 // we maxed out the array 2416 byte[] out = new byte[copy.length + 1]; 2417 out[0] = 1; 2418 System.arraycopy(copy, 0, out, 1, copy.length); 2419 return out; 2420 } 2421 2422 public static boolean equals(List<byte[]> a, List<byte[]> b) { 2423 if (a == null) { 2424 if (b == null) { 2425 return true; 2426 } 2427 return false; 2428 } 2429 if (b == null) { 2430 return false; 2431 } 2432 if (a.size() != b.size()) { 2433 return false; 2434 } 2435 for (int i = 0; i < a.size(); ++i) { 2436 if (!Bytes.equals(a.get(i), b.get(i))) { 2437 return false; 2438 } 2439 } 2440 return true; 2441 } 2442 2443 public static boolean isSorted(Collection<byte[]> arrays) { 2444 if (!CollectionUtils.isEmpty(arrays)) { 2445 byte[] previous = new byte[0]; 2446 for (byte[] array : arrays) { 2447 if (Bytes.compareTo(previous, array) > 0) { 2448 return false; 2449 } 2450 previous = array; 2451 } 2452 } 2453 return true; 2454 } 2455 2456 public static List<byte[]> getUtf8ByteArrays(List<String> strings) { 2457 if (CollectionUtils.isEmpty(strings)) { 2458 return Collections.emptyList(); 2459 } 2460 List<byte[]> byteArrays = new ArrayList<>(strings.size()); 2461 strings.forEach(s -> byteArrays.add(Bytes.toBytes(s))); 2462 return byteArrays; 2463 } 2464 2465 /** 2466 * Returns the index of the first appearance of the value {@code target} in 2467 * {@code array}. 2468 * 2469 * @param array an array of {@code byte} values, possibly empty 2470 * @param target a primitive {@code byte} value 2471 * @return the least index {@code i} for which {@code array[i] == target}, or 2472 * {@code -1} if no such index exists. 2473 */ 2474 public static int indexOf(byte[] array, byte target) { 2475 for (int i = 0; i < array.length; i++) { 2476 if (array[i] == target) { 2477 return i; 2478 } 2479 } 2480 return -1; 2481 } 2482 2483 /** 2484 * Returns the start position of the first occurrence of the specified {@code 2485 * target} within {@code array}, or {@code -1} if there is no such occurrence. 2486 * 2487 * <p>More formally, returns the lowest index {@code i} such that {@code 2488 * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly 2489 * the same elements as {@code target}. 2490 * 2491 * @param array the array to search for the sequence {@code target} 2492 * @param target the array to search for as a sub-sequence of {@code array} 2493 */ 2494 public static int indexOf(byte[] array, byte[] target) { 2495 checkNotNull(array, "array"); 2496 checkNotNull(target, "target"); 2497 if (target.length == 0) { 2498 return 0; 2499 } 2500 2501 outer: 2502 for (int i = 0; i < array.length - target.length + 1; i++) { 2503 for (int j = 0; j < target.length; j++) { 2504 if (array[i + j] != target[j]) { 2505 continue outer; 2506 } 2507 } 2508 return i; 2509 } 2510 return -1; 2511 } 2512 2513 /** 2514 * @param array an array of {@code byte} values, possibly empty 2515 * @param target a primitive {@code byte} value 2516 * @return {@code true} if {@code target} is present as an element anywhere in {@code array}. 2517 */ 2518 public static boolean contains(byte[] array, byte target) { 2519 return indexOf(array, target) > -1; 2520 } 2521 2522 /** 2523 * @param array an array of {@code byte} values, possibly empty 2524 * @param target an array of {@code byte} 2525 * @return {@code true} if {@code target} is present anywhere in {@code array} 2526 */ 2527 public static boolean contains(byte[] array, byte[] target) { 2528 return indexOf(array, target) > -1; 2529 } 2530 2531 /** 2532 * Fill given array with zeros. 2533 * @param b array which needs to be filled with zeros 2534 */ 2535 public static void zero(byte[] b) { 2536 zero(b, 0, b.length); 2537 } 2538 2539 /** 2540 * Fill given array with zeros at the specified position. 2541 * @param b 2542 * @param offset 2543 * @param length 2544 */ 2545 public static void zero(byte[] b, int offset, int length) { 2546 checkPositionIndex(offset, b.length, "offset"); 2547 checkArgument(length > 0, "length must be greater than 0"); 2548 checkPositionIndex(offset + length, b.length, "offset + length"); 2549 Arrays.fill(b, offset, offset + length, (byte) 0); 2550 } 2551 2552 private static final SecureRandom RNG = new SecureRandom(); 2553 2554 /** 2555 * Fill given array with random bytes. 2556 * @param b array which needs to be filled with random bytes 2557 */ 2558 public static void random(byte[] b) { 2559 RNG.nextBytes(b); 2560 } 2561 2562 /** 2563 * Fill given array with random bytes at the specified position. 2564 * @param b 2565 * @param offset 2566 * @param length 2567 */ 2568 public static void random(byte[] b, int offset, int length) { 2569 checkPositionIndex(offset, b.length, "offset"); 2570 checkArgument(length > 0, "length must be greater than 0"); 2571 checkPositionIndex(offset + length, b.length, "offset + length"); 2572 byte[] buf = new byte[length]; 2573 RNG.nextBytes(buf); 2574 System.arraycopy(buf, 0, b, offset, length); 2575 } 2576 2577 /** 2578 * Create a max byte array with the specified max byte count 2579 * @param maxByteCount the length of returned byte array 2580 * @return the created max byte array 2581 */ 2582 public static byte[] createMaxByteArray(int maxByteCount) { 2583 byte[] maxByteArray = new byte[maxByteCount]; 2584 for (int i = 0; i < maxByteArray.length; i++) { 2585 maxByteArray[i] = (byte) 0xff; 2586 } 2587 return maxByteArray; 2588 } 2589 2590 /** 2591 * Create a byte array which is multiple given bytes 2592 * @param srcBytes 2593 * @param multiNum 2594 * @return byte array 2595 */ 2596 public static byte[] multiple(byte[] srcBytes, int multiNum) { 2597 if (multiNum <= 0) { 2598 return new byte[0]; 2599 } 2600 byte[] result = new byte[srcBytes.length * multiNum]; 2601 for (int i = 0; i < multiNum; i++) { 2602 System.arraycopy(srcBytes, 0, result, i * srcBytes.length, 2603 srcBytes.length); 2604 } 2605 return result; 2606 } 2607 2608 private static final char[] HEX_CHARS = { 2609 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' 2610 }; 2611 2612 /** 2613 * Convert a byte range into a hex string 2614 */ 2615 public static String toHex(byte[] b, int offset, int length) { 2616 checkArgument(length <= Integer.MAX_VALUE / 2); 2617 int numChars = length * 2; 2618 char[] ch = new char[numChars]; 2619 for (int i = 0; i < numChars; i += 2) 2620 { 2621 byte d = b[offset + i/2]; 2622 ch[i] = HEX_CHARS[(d >> 4) & 0x0F]; 2623 ch[i+1] = HEX_CHARS[d & 0x0F]; 2624 } 2625 return new String(ch); 2626 } 2627 2628 /** 2629 * Convert a byte array into a hex string 2630 */ 2631 public static String toHex(byte[] b) { 2632 return toHex(b, 0, b.length); 2633 } 2634 2635 private static int hexCharToNibble(char ch) { 2636 if (ch <= '9' && ch >= '0') { 2637 return ch - '0'; 2638 } else if (ch >= 'a' && ch <= 'f') { 2639 return ch - 'a' + 10; 2640 } else if (ch >= 'A' && ch <= 'F') { 2641 return ch - 'A' + 10; 2642 } 2643 throw new IllegalArgumentException("Invalid hex char: " + ch); 2644 } 2645 2646 private static byte hexCharsToByte(char c1, char c2) { 2647 return (byte) ((hexCharToNibble(c1) << 4) | hexCharToNibble(c2)); 2648 } 2649 2650 /** 2651 * Create a byte array from a string of hash digits. The length of the 2652 * string must be a multiple of 2 2653 * @param hex 2654 */ 2655 public static byte[] fromHex(String hex) { 2656 checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2"); 2657 int len = hex.length(); 2658 byte[] b = new byte[len / 2]; 2659 for (int i = 0; i < len; i += 2) { 2660 b[i / 2] = hexCharsToByte(hex.charAt(i),hex.charAt(i+1)); 2661 } 2662 return b; 2663 } 2664 2665 /** 2666 * @param b 2667 * @param delimiter 2668 * @return Index of delimiter having started from start of <code>b</code> moving rightward. 2669 */ 2670 public static int searchDelimiterIndex(final byte[] b, int offset, final int length, 2671 final int delimiter) { 2672 if (b == null) { 2673 throw new IllegalArgumentException("Passed buffer is null"); 2674 } 2675 int result = -1; 2676 for (int i = offset; i < length + offset; i++) { 2677 if (b[i] == delimiter) { 2678 result = i; 2679 break; 2680 } 2681 } 2682 return result; 2683 } 2684 2685 /** 2686 * Find index of passed delimiter walking from end of buffer backwards. 2687 * 2688 * @param b 2689 * @param delimiter 2690 * @return Index of delimiter 2691 */ 2692 public static int searchDelimiterIndexInReverse(final byte[] b, final int offset, 2693 final int length, final int delimiter) { 2694 if (b == null) { 2695 throw new IllegalArgumentException("Passed buffer is null"); 2696 } 2697 int result = -1; 2698 for (int i = (offset + length) - 1; i >= offset; i--) { 2699 if (b[i] == delimiter) { 2700 result = i; 2701 break; 2702 } 2703 } 2704 return result; 2705 } 2706 2707 public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, 2708 int leftOffset, int rightOffset) { 2709 int length = Math.min(leftLength, rightLength); 2710 int result = 0; 2711 2712 while (result < length && left[leftOffset + result] == right[rightOffset + result]) { 2713 result++; 2714 } 2715 return result; 2716 } 2717}