001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hadoop.hbase.util.Order.ASCENDING; 021import static org.apache.hadoop.hbase.util.Order.DESCENDING; 022 023import java.math.BigDecimal; 024import java.math.BigInteger; 025import java.math.MathContext; 026import java.math.RoundingMode; 027import java.nio.charset.Charset; 028 029import org.apache.yetus.audience.InterfaceAudience; 030 031import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 032 033/** 034 * Utility class that handles ordered byte arrays. That is, unlike 035 * {@link Bytes}, these methods produce byte arrays which maintain the sort 036 * order of the original values. 037 * <h3>Encoding Format summary</h3> 038 * <p> 039 * Each value is encoded as one or more bytes. The first byte of the encoding, 040 * its meaning, and a terse description of the bytes that follow is given by 041 * the following table: 042 * </p> 043 * <table summary="Encodings"> 044 * <tr><th>Content Type</th><th>Encoding</th></tr> 045 * <tr><td>NULL</td><td>0x05</td></tr> 046 * <tr><td>negative infinity</td><td>0x07</td></tr> 047 * <tr><td>negative large</td><td>0x08, ~E, ~M</td></tr> 048 * <tr><td>negative medium</td><td>0x13-E, ~M</td></tr> 049 * <tr><td>negative small</td><td>0x14, -E, ~M</td></tr> 050 * <tr><td>zero</td><td>0x15</td></tr> 051 * <tr><td>positive small</td><td>0x16, ~-E, M</td></tr> 052 * <tr><td>positive medium</td><td>0x17+E, M</td></tr> 053 * <tr><td>positive large</td><td>0x22, E, M</td></tr> 054 * <tr><td>positive infinity</td><td>0x23</td></tr> 055 * <tr><td>NaN</td><td>0x25</td></tr> 056 * <tr><td>fixed-length 32-bit integer</td><td>0x27, I</td></tr> 057 * <tr><td>fixed-length 64-bit integer</td><td>0x28, I</td></tr> 058 * <tr><td>fixed-length 8-bit integer</td><td>0x29</td></tr> 059 * <tr><td>fixed-length 16-bit integer</td><td>0x2a</td></tr> 060 * <tr><td>fixed-length 32-bit float</td><td>0x30, F</td></tr> 061 * <tr><td>fixed-length 64-bit float</td><td>0x31, F</td></tr> 062 * <tr><td>TEXT</td><td>0x33, T</td></tr> 063 * <tr><td>variable length BLOB</td><td>0x35, B</td></tr> 064 * <tr><td>byte-for-byte BLOB</td><td>0x36, X</td></tr> 065 * </table> 066 * 067 * <h3>Null Encoding</h3> 068 * <p> 069 * Each value that is a NULL encodes as a single byte of 0x05. Since every 070 * other value encoding begins with a byte greater than 0x05, this forces NULL 071 * values to sort first. 072 * </p> 073 * <h3>Text Encoding</h3> 074 * <p> 075 * Each text value begins with a single byte of 0x33 and ends with a single 076 * byte of 0x00. There are zero or more intervening bytes that encode the text 077 * value. The intervening bytes are chosen so that the encoding will sort in 078 * the desired collating order. The intervening bytes may not contain a 0x00 079 * character; the only 0x00 byte allowed in a text encoding is the final byte. 080 * </p> 081 * <p> 082 * The text encoding ends in 0x00 in order to ensure that when there are two 083 * strings where one is a prefix of the other that the shorter string will 084 * sort first. 085 * </p> 086 * <h3>Binary Encoding</h3> 087 * <p> 088 * There are two encoding strategies for binary fields, referred to as 089 * "BlobVar" and "BlobCopy". BlobVar is less efficient in both space and 090 * encoding time. It has no limitations on the range of encoded values. 091 * BlobCopy is a byte-for-byte copy of the input data followed by a 092 * termination byte. It is extremely fast to encode and decode. It carries the 093 * restriction of not allowing a 0x00 value in the input byte[] as this value 094 * is used as the termination byte. 095 * </p> 096 * <h4>BlobVar</h4> 097 * <p> 098 * "BlobVar" encodes the input byte[] in a manner similar to a variable length 099 * integer encoding. As with the other {@code OrderedBytes} encodings, 100 * the first encoded byte is used to indicate what kind of value follows. This 101 * header byte is 0x37 for BlobVar encoded values. As with the traditional 102 * varint encoding, the most significant bit of each subsequent encoded 103 * {@code byte} is used as a continuation marker. The 7 remaining bits 104 * contain the 7 most significant bits of the first unencoded byte. The next 105 * encoded byte starts with a continuation marker in the MSB. The least 106 * significant bit from the first unencoded byte follows, and the remaining 6 107 * bits contain the 6 MSBs of the second unencoded byte. The encoding 108 * continues, encoding 7 bytes on to 8 encoded bytes. The MSB of the final 109 * encoded byte contains a termination marker rather than a continuation 110 * marker, and any remaining bits from the final input byte. Any trailing bits 111 * in the final encoded byte are zeros. 112 * </p> 113 * <h4>BlobCopy</h4> 114 * <p> 115 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38 116 * as the header byte, and is terminated by 0x00 in the DESCENDING case. This 117 * alternative encoding is faster and more space-efficient, but it cannot 118 * accept values containing a 0x00 byte in DESCENDING order. 119 * </p> 120 * <h3>Variable-length Numeric Encoding</h3> 121 * <p> 122 * Numeric values must be coded so as to sort in numeric order. We assume that 123 * numeric values can be both integer and floating point values. Clients must 124 * be careful to use inspection methods for encoded values (such as 125 * {@link #isNumericInfinite(PositionedByteRange)} and 126 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding 127 * values into object which do not support these numeric concepts (such as 128 * {@link Long} and {@link BigDecimal}). 129 * </p> 130 * <p> 131 * Simplest cases first: If the numeric value is a NaN, then the encoding is a 132 * single byte of 0x25. This causes NaN values to sort after every other 133 * numeric value. 134 * </p> 135 * <p> 136 * If the numeric value is a negative infinity then the encoding is a single 137 * byte of 0x07. Since every other numeric value except NaN has a larger 138 * initial byte, this encoding ensures that negative infinity will sort prior 139 * to every other numeric value other than NaN. 140 * </p> 141 * <p> 142 * If the numeric value is a positive infinity then the encoding is a single 143 * byte of 0x23. Every other numeric value encoding begins with a smaller 144 * byte, ensuring that positive infinity always sorts last among numeric 145 * values. 0x23 is also smaller than 0x33, the initial byte of a text value, 146 * ensuring that every numeric value sorts before every text value. 147 * </p> 148 * <p> 149 * If the numeric value is exactly zero then it is encoded as a single byte of 150 * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 151 * and finite positive values will have initial bytes of 0x16 through 0x22. 152 * </p> 153 * <p> 154 * For all numeric values, we compute a mantissa M and an exponent E. The 155 * mantissa is a base-100 representation of the value. The exponent E 156 * determines where to put the decimal point. 157 * </p> 158 * <p> 159 * Each centimal digit of the mantissa is stored in a byte. If the value of 160 * the centimal digit is X (hence X≥0 and X≤99) then the byte value will 161 * be 2*X+1 for every byte of the mantissa, except for the last byte which 162 * will be 2*X+0. The mantissa must be the minimum number of bytes necessary 163 * to represent the value; trailing X==0 digits are omitted. This means that 164 * the mantissa will never contain a byte with the value 0x00. 165 * </p> 166 * <p> 167 * If we assume all digits of the mantissa occur to the right of the decimal 168 * point, then the exponent E is the power of one hundred by which one must 169 * multiply the mantissa to recover the original value. 170 * </p> 171 * <p> 172 * Values are classified as large, medium, or small according to the value of 173 * E. If E is 11 or more, the value is large. For E between 0 and 10, the 174 * value is medium. For E less than zero, the value is small. 175 * </p> 176 * <p> 177 * Large positive values are encoded as a single byte 0x22 followed by E as a 178 * varint and then M. Medium positive values are a single byte of 0x17+E 179 * followed by M. Small positive values are encoded as a single byte 0x16 180 * followed by the ones-complement of the varint for -E followed by M. 181 * </p> 182 * <p> 183 * Small negative values are encoded as a single byte 0x14 followed by -E as a 184 * varint and then the ones-complement of M. Medium negative values are 185 * encoded as a byte 0x13-E followed by the ones-complement of M. Large 186 * negative values consist of the single byte 0x08 followed by the 187 * ones-complement of the varint encoding of E followed by the ones-complement 188 * of M. 189 * </p> 190 * <h3>Fixed-length Integer Encoding</h3> 191 * <p> 192 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte 193 * format. All 8-byte integers are serialized to the equivelant 9-byte format. 194 * Serialization is performed by writing a header byte, inverting the integer 195 * sign bit and writing the resulting bytes to the byte array in big endian 196 * order. 197 * </p> 198 * <h3>Fixed-length Floating Point Encoding</h3> 199 * <p> 200 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte 201 * encoding format, respectively. The format is identical, save for the 202 * precision respected in each step of the operation. 203 * <p> 204 * This format ensures the following total ordering of floating point values: 205 * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < 206 * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... < 207 * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN 208 * </p> 209 * <p> 210 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit 211 * single precision float consists of a sign bit, 8-bit unsigned exponent 212 * encoded in offset-127 notation, and a 23-bit significand. The format is 213 * described further in the <a 214 * href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision 215 * Floating Point Wikipedia page</a> 216 * </p> 217 * <p> 218 * The value of a normal float is -1 <sup>sign bit</sup> × 219 * 2<sup>exponent - 127</sup> × 1.significand 220 * </p> 221 * <p> 222 * The IEE754 floating point format already preserves sort ordering for 223 * positive floating point numbers when the raw bytes are compared in most 224 * significant byte order. This is discussed further at <a href= 225 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm"> 226 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a> 227 * </p> 228 * <p> 229 * Thus, we need only ensure that negative numbers sort in the the exact 230 * opposite order as positive numbers (so that say, negative infinity is less 231 * than negative 1), and that all negative numbers compare less than any 232 * positive number. To accomplish this, we invert the sign bit of all floating 233 * point numbers, and we also invert the exponent and significand bits if the 234 * floating point number was negative. 235 * </p> 236 * <p> 237 * More specifically, we first store the floating point bits into a 32-bit int 238 * {@code j} using {@link Float#floatToIntBits}. This method collapses 239 * all NaNs into a single, canonical NaN value but otherwise leaves the bits 240 * unchanged. We then compute 241 * </p> 242 * 243 * <pre> 244 * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE 245 * </pre> 246 * <p> 247 * which inverts the sign bit and XOR's all other bits with the sign bit 248 * itself. Comparing the raw bytes of {@code j} in most significant byte 249 * order is equivalent to performing a single precision floating point 250 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't 251 * compare equal to anything when performing floating point comparisons). 252 * </p> 253 * <p> 254 * The resulting integer is then converted into a byte array by serializing 255 * the integer one byte at a time in most significant byte order. The 256 * serialized integer is prefixed by a single header byte. All serialized 257 * values are 5 bytes in length. 258 * </p> 259 * <p> 260 * {@code OrderedBytes} encodings are heavily influenced by the 261 * <a href="http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key 262 * Encoding</a>. Slight deviations are make in the interest of order 263 * correctness and user extensibility. Fixed-width {@code Long} and 264 * {@link Double} encodings are based on implementations from the now defunct 265 * Orderly library. 266 * </p> 267 */ 268@InterfaceAudience.Public 269public class OrderedBytes { 270 271 /* 272 * These constants define header bytes used to identify encoded values. Note 273 * that the values here are not exhaustive as the Numeric format encodes 274 * portions of its value within the header byte. The values listed here are 275 * directly applied to persisted data -- DO NOT modify the values specified 276 * here. Instead, gaps are placed intentionally between values so that new 277 * implementations can be inserted into the total ordering enforced here. 278 */ 279 private static final byte NULL = 0x05; 280 // room for 1 expansion type 281 private static final byte NEG_INF = 0x07; 282 private static final byte NEG_LARGE = 0x08; 283 private static final byte NEG_MED_MIN = 0x09; 284 private static final byte NEG_MED_MAX = 0x13; 285 private static final byte NEG_SMALL = 0x14; 286 private static final byte ZERO = 0x15; 287 private static final byte POS_SMALL = 0x16; 288 private static final byte POS_MED_MIN = 0x17; 289 private static final byte POS_MED_MAX = 0x21; 290 private static final byte POS_LARGE = 0x22; 291 private static final byte POS_INF = 0x23; 292 // room for 2 expansion type 293 private static final byte NAN = 0x26; 294 // room for 2 expansion types 295 private static final byte FIXED_INT8 = 0x29; 296 private static final byte FIXED_INT16 = 0x2a; 297 private static final byte FIXED_INT32 = 0x2b; 298 private static final byte FIXED_INT64 = 0x2c; 299 // room for 3 expansion types 300 private static final byte FIXED_FLOAT32 = 0x30; 301 private static final byte FIXED_FLOAT64 = 0x31; 302 // room for 2 expansion type 303 private static final byte TEXT = 0x34; 304 // room for 2 expansion type 305 private static final byte BLOB_VAR = 0x37; 306 private static final byte BLOB_COPY = 0x38; 307 308 /* 309 * The following constant values are used by encoding implementations 310 */ 311 312 public static final Charset UTF8 = Charset.forName("UTF-8"); 313 private static final byte TERM = 0x00; 314 private static final BigDecimal E8 = BigDecimal.valueOf(1e8); 315 private static final BigDecimal E32 = BigDecimal.valueOf(1e32); 316 private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2); 317 private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10); 318 319 /** 320 * Max precision guaranteed to fit into a {@code long}. 321 */ 322 public static final int MAX_PRECISION = 31; 323 324 /** 325 * The context used to normalize {@link BigDecimal} values. 326 */ 327 public static final MathContext DEFAULT_MATH_CONTEXT = 328 new MathContext(MAX_PRECISION, RoundingMode.HALF_UP); 329 330 /** 331 * Creates the standard exception when the encoded header byte is unexpected for the decoding 332 * context. 333 * @param header value used in error message. 334 */ 335 private static IllegalArgumentException unexpectedHeader(byte header) { 336 throw new IllegalArgumentException("unexpected value in first byte: 0x" 337 + Long.toHexString(header)); 338 } 339 340 /** 341 * Perform unsigned comparison between two long values. Conforms to the same interface as 342 * {@link org.apache.hadoop.hbase.CellComparator}. 343 */ 344 private static int unsignedCmp(long x1, long x2) { 345 int cmp; 346 if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; 347 // invert the result when either value is negative 348 if ((x1 < 0) != (x2 < 0)) return -cmp; 349 return cmp; 350 } 351 352 /** 353 * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes. 354 * @return number of bytes written. 355 */ 356 private static int putUint32(PositionedByteRange dst, int val) { 357 dst.put((byte) (val >>> 24)) 358 .put((byte) (val >>> 16)) 359 .put((byte) (val >>> 8)) 360 .put((byte) val); 361 return 4; 362 } 363 364 /** 365 * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}. 366 * @param dst The destination to which encoded bytes are written. 367 * @param val The value to write. 368 * @param comp Compliment the encoded value when {@code comp} is true. 369 * @return number of bytes written. 370 */ 371 @VisibleForTesting 372 static int putVaruint64(PositionedByteRange dst, long val, boolean comp) { 373 int w, y, len = 0; 374 final int offset = dst.getOffset(), start = dst.getPosition(); 375 byte[] a = dst.getBytes(); 376 Order ord = comp ? DESCENDING : ASCENDING; 377 if (-1 == unsignedCmp(val, 241L)) { 378 dst.put((byte) val); 379 len = dst.getPosition() - start; 380 ord.apply(a, offset + start, len); 381 return len; 382 } 383 if (-1 == unsignedCmp(val, 2288L)) { 384 y = (int) (val - 240); 385 dst.put((byte) (y / 256 + 241)) 386 .put((byte) (y % 256)); 387 len = dst.getPosition() - start; 388 ord.apply(a, offset + start, len); 389 return len; 390 } 391 if (-1 == unsignedCmp(val, 67824L)) { 392 y = (int) (val - 2288); 393 dst.put((byte) 249) 394 .put((byte) (y / 256)) 395 .put((byte) (y % 256)); 396 len = dst.getPosition() - start; 397 ord.apply(a, offset + start, len); 398 return len; 399 } 400 y = (int) val; 401 w = (int) (val >>> 32); 402 if (w == 0) { 403 if (-1 == unsignedCmp(y, 16777216L)) { 404 dst.put((byte) 250) 405 .put((byte) (y >>> 16)) 406 .put((byte) (y >>> 8)) 407 .put((byte) y); 408 len = dst.getPosition() - start; 409 ord.apply(a, offset + start, len); 410 return len; 411 } 412 dst.put((byte) 251); 413 putUint32(dst, y); 414 len = dst.getPosition() - start; 415 ord.apply(a, offset + start, len); 416 return len; 417 } 418 if (-1 == unsignedCmp(w, 256L)) { 419 dst.put((byte) 252) 420 .put((byte) w); 421 putUint32(dst, y); 422 len = dst.getPosition() - start; 423 ord.apply(a, offset + start, len); 424 return len; 425 } 426 if (-1 == unsignedCmp(w, 65536L)) { 427 dst.put((byte) 253) 428 .put((byte) (w >>> 8)) 429 .put((byte) w); 430 putUint32(dst, y); 431 len = dst.getPosition() - start; 432 ord.apply(a, offset + start, len); 433 return len; 434 } 435 if (-1 == unsignedCmp(w, 16777216L)) { 436 dst.put((byte) 254) 437 .put((byte) (w >>> 16)) 438 .put((byte) (w >>> 8)) 439 .put((byte) w); 440 putUint32(dst, y); 441 len = dst.getPosition() - start; 442 ord.apply(a, offset + start, len); 443 return len; 444 } 445 dst.put((byte) 255); 446 putUint32(dst, w); 447 putUint32(dst, y); 448 len = dst.getPosition() - start; 449 ord.apply(a, offset + start, len); 450 return len; 451 } 452 453 /** 454 * Inspect {@code src} for an encoded varuint64 for its length in bytes. 455 * Preserves the state of {@code src}. 456 * @param src source buffer 457 * @param comp if true, parse the compliment of the value. 458 * @return the number of bytes consumed by this value. 459 */ 460 @VisibleForTesting 461 static int lengthVaruint64(PositionedByteRange src, boolean comp) { 462 int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff; 463 if (a0 <= 240) return 1; 464 if (a0 <= 248) return 2; 465 if (a0 == 249) return 3; 466 if (a0 == 250) return 4; 467 if (a0 == 251) return 5; 468 if (a0 == 252) return 6; 469 if (a0 == 253) return 7; 470 if (a0 == 254) return 8; 471 if (a0 == 255) return 9; 472 throw unexpectedHeader(src.peek()); 473 } 474 475 /** 476 * Skip {@code src} over the encoded varuint64. 477 * @param src source buffer 478 * @param cmp if true, parse the compliment of the value. 479 * @return the number of bytes skipped. 480 */ 481 @VisibleForTesting 482 static int skipVaruint64(PositionedByteRange src, boolean cmp) { 483 final int len = lengthVaruint64(src, cmp); 484 src.setPosition(src.getPosition() + len); 485 return len; 486 } 487 488 /** 489 * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the 490 * encoded value when {@code comp} is true. 491 * @return the decoded value. 492 */ 493 @VisibleForTesting 494 static long getVaruint64(PositionedByteRange src, boolean comp) { 495 assert src.getRemaining() >= lengthVaruint64(src, comp); 496 final long ret; 497 Order ord = comp ? DESCENDING : ASCENDING; 498 byte x = src.get(); 499 final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; 500 if (-1 == unsignedCmp(a0, 241)) { 501 return a0; 502 } 503 x = src.get(); 504 a1 = ord.apply(x) & 0xff; 505 if (-1 == unsignedCmp(a0, 249)) { 506 return (a0 - 241L) * 256 + a1 + 240; 507 } 508 x = src.get(); 509 a2 = ord.apply(x) & 0xff; 510 if (a0 == 249) { 511 return 2288L + 256 * a1 + a2; 512 } 513 x = src.get(); 514 a3 = ord.apply(x) & 0xff; 515 if (a0 == 250) { 516 return ((long) a1 << 16L) | (a2 << 8) | a3; 517 } 518 x = src.get(); 519 a4 = ord.apply(x) & 0xff; 520 ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4; 521 if (a0 == 251) { 522 return ret; 523 } 524 x = src.get(); 525 a5 = ord.apply(x) & 0xff; 526 if (a0 == 252) { 527 return (ret << 8) | a5; 528 } 529 x = src.get(); 530 a6 = ord.apply(x) & 0xff; 531 if (a0 == 253) { 532 return (ret << 16) | (a5 << 8) | a6; 533 } 534 x = src.get(); 535 a7 = ord.apply(x) & 0xff; 536 if (a0 == 254) { 537 return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; 538 } 539 x = src.get(); 540 a8 = ord.apply(x) & 0xff; 541 return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8; 542 } 543 544 /** 545 * Strip all trailing zeros to ensure that no digit will be zero and round 546 * using our default context to ensure precision doesn't exceed max allowed. 547 * From Phoenix's {@code NumberUtil}. 548 * @return new {@link BigDecimal} instance 549 */ 550 @VisibleForTesting 551 static BigDecimal normalize(BigDecimal val) { 552 return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT); 553 } 554 555 /** 556 * Read significand digits from {@code src} according to the magnitude 557 * of {@code e}. 558 * @param src The source from which to read encoded digits. 559 * @param e The magnitude of the first digit read. 560 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 561 * @return The decoded value. 562 * @throws IllegalArgumentException when read exceeds the remaining length 563 * of {@code src}. 564 */ 565 private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) { 566 // TODO: can this be made faster? 567 byte[] a = src.getBytes(); 568 final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining(); 569 Order ord = comp ? DESCENDING : ASCENDING; 570 BigDecimal m = BigDecimal.ZERO; 571 e--; 572 for (int i = 0;; i++) { 573 if (i > remaining) { 574 // we've exceeded this range's window 575 src.setPosition(start); 576 throw new IllegalArgumentException( 577 "Read exceeds range before termination byte found. offset: " + offset + " position: " 578 + (start + i)); 579 } 580 // base-100 digits are encoded as val * 2 + 1 except for the termination digit. 581 m = m.add( // m += 582 new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit] 583 BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2))); 584 e--; 585 // detect termination digit 586 if ((ord.apply(a[offset + start + i]) & 1) == 0) { 587 src.setPosition(start + i + 1); 588 break; 589 } 590 } 591 return normalize(m); 592 } 593 594 /** 595 * Skip {@code src} over the significand bytes. 596 * @param src The source from which to read encoded digits. 597 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 598 * @return the number of bytes skipped. 599 */ 600 private static int skipSignificand(PositionedByteRange src, boolean comp) { 601 byte[] a = src.getBytes(); 602 final int offset = src.getOffset(), start = src.getPosition(); 603 int i = src.getPosition(); 604 while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0) 605 ; 606 src.setPosition(i); 607 return i - start; 608 } 609 610 /** 611 * <p> 612 * Encode the small magnitude floating point number {@code val} using the 613 * key encoding. The caller guarantees that 1.0 > abs(val) > 0.0. 614 * </p> 615 * <p> 616 * A floating point value is encoded as an integer exponent {@code E} and a 617 * mantissa {@code M}. The original value is equal to {@code (M * 100^E)}. 618 * {@code E} is set to the smallest value possible without making {@code M} 619 * greater than or equal to 1.0. 620 * </p> 621 * <p> 622 * For this routine, {@code E} will always be zero or negative, since the 623 * original value is less than one. The encoding written by this routine is 624 * the ones-complement of the varint of the negative of {@code E} followed 625 * by the mantissa: 626 * <pre> 627 * Encoding: ~-E M 628 * </pre> 629 * </p> 630 * @param dst The destination to which encoded digits are written. 631 * @param val The value to encode. 632 * @return the number of bytes written. 633 */ 634 private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) { 635 // TODO: this can be done faster? 636 // assert 1.0 > abs(val) > 0.0 637 BigDecimal abs = val.abs(); 638 assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0; 639 byte[] a = dst.getBytes(); 640 boolean isNeg = val.signum() == -1; 641 final int offset = dst.getOffset(), start = dst.getPosition(); 642 int e = 0, d, startM; 643 644 if (isNeg) { /* Small negative number: 0x14, -E, ~M */ 645 dst.put(NEG_SMALL); 646 } else { /* Small positive number: 0x16, ~-E, M */ 647 dst.put(POS_SMALL); 648 } 649 650 // normalize abs(val) to determine E 651 while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; } 652 while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; } 653 654 putVaruint64(dst, e, !isNeg); // encode appropriate E value. 655 656 // encode M by peeling off centimal digits, encoding x as 2x+1 657 startM = dst.getPosition(); 658 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of 659 // numeric scale. 660 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { 661 abs = abs.movePointRight(2); 662 d = abs.intValue(); 663 dst.put((byte) ((2 * d + 1) & 0xff)); 664 abs = abs.subtract(BigDecimal.valueOf(d)); 665 } 666 // terminal digit should be 2x 667 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 668 if (isNeg) { 669 // negative values encoded as ~M 670 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 671 } 672 return dst.getPosition() - start; 673 } 674 675 /** 676 * Encode the large magnitude floating point number {@code val} using 677 * the key encoding. The caller guarantees that {@code val} will be 678 * finite and abs(val) >= 1.0. 679 * <p> 680 * A floating point value is encoded as an integer exponent {@code E} 681 * and a mantissa {@code M}. The original value is equal to 682 * {@code (M * 100^E)}. {@code E} is set to the smallest value 683 * possible without making {@code M} greater than or equal to 1.0. 684 * </p> 685 * <p> 686 * Each centimal digit of the mantissa is stored in a byte. If the value of 687 * the centimal digit is {@code X} (hence {@code X>=0} and 688 * {@code X<=99}) then the byte value will be {@code 2*X+1} for 689 * every byte of the mantissa, except for the last byte which will be 690 * {@code 2*X+0}. The mantissa must be the minimum number of bytes 691 * necessary to represent the value; trailing {@code X==0} digits are 692 * omitted. This means that the mantissa will never contain a byte with the 693 * value {@code 0x00}. 694 * </p> 695 * <p> 696 * If {@code E > 10}, then this routine writes of {@code E} as a 697 * varint followed by the mantissa as described above. Otherwise, if 698 * {@code E <= 10}, this routine only writes the mantissa and leaves 699 * the {@code E} value to be encoded as part of the opening byte of the 700 * field by the calling function. 701 * 702 * <pre> 703 * Encoding: M (if E<=10) 704 * E M (if E>10) 705 * </pre> 706 * </p> 707 * @param dst The destination to which encoded digits are written. 708 * @param val The value to encode. 709 * @return the number of bytes written. 710 */ 711 private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) { 712 // TODO: this can be done faster 713 BigDecimal abs = val.abs(); 714 byte[] a = dst.getBytes(); 715 boolean isNeg = val.signum() == -1; 716 final int start = dst.getPosition(), offset = dst.getOffset(); 717 int e = 0, d, startM; 718 719 if (isNeg) { /* Large negative number: 0x08, ~E, ~M */ 720 dst.put(NEG_LARGE); 721 } else { /* Large positive number: 0x22, E, M */ 722 dst.put(POS_LARGE); 723 } 724 725 // normalize abs(val) to determine E 726 while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; } 727 while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; } 728 while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; } 729 730 // encode appropriate header byte and/or E value. 731 if (e > 10) { /* large number, write out {~,}E */ 732 putVaruint64(dst, e, isNeg); 733 } else { 734 if (isNeg) { /* Medium negative number: 0x13-E, ~M */ 735 dst.put(start, (byte) (NEG_MED_MAX - e)); 736 } else { /* Medium positive number: 0x17+E, M */ 737 dst.put(start, (byte) (POS_MED_MIN + e)); 738 } 739 } 740 741 // encode M by peeling off centimal digits, encoding x as 2x+1 742 startM = dst.getPosition(); 743 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of 744 // numeric scale. 745 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { 746 abs = abs.movePointRight(2); 747 d = abs.intValue(); 748 dst.put((byte) (2 * d + 1)); 749 abs = abs.subtract(BigDecimal.valueOf(d)); 750 } 751 // terminal digit should be 2x 752 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 753 if (isNeg) { 754 // negative values encoded as ~M 755 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 756 } 757 return dst.getPosition() - start; 758 } 759 760 /** 761 * Encode a numerical value using the variable-length encoding. 762 * @param dst The destination to which encoded digits are written. 763 * @param val The value to encode. 764 * @param ord The {@link Order} to respect while encoding {@code val}. 765 * @return the number of bytes written. 766 */ 767 public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) { 768 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 769 } 770 771 /** 772 * Encode a numerical value using the variable-length encoding. 773 * @param dst The destination to which encoded digits are written. 774 * @param val The value to encode. 775 * @param ord The {@link Order} to respect while encoding {@code val}. 776 * @return the number of bytes written. 777 */ 778 public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) { 779 if (val == 0.0) { 780 dst.put(ord.apply(ZERO)); 781 return 1; 782 } 783 if (Double.isNaN(val)) { 784 dst.put(ord.apply(NAN)); 785 return 1; 786 } 787 if (val == Double.NEGATIVE_INFINITY) { 788 dst.put(ord.apply(NEG_INF)); 789 return 1; 790 } 791 if (val == Double.POSITIVE_INFINITY) { 792 dst.put(ord.apply(POS_INF)); 793 return 1; 794 } 795 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 796 } 797 798 /** 799 * Encode a numerical value using the variable-length encoding. 800 * @param dst The destination to which encoded digits are written. 801 * @param val The value to encode. 802 * @param ord The {@link Order} to respect while encoding {@code val}. 803 * @return the number of bytes written. 804 */ 805 public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) { 806 final int len, offset = dst.getOffset(), start = dst.getPosition(); 807 if (null == val) { 808 return encodeNull(dst, ord); 809 } else if (BigDecimal.ZERO.compareTo(val) == 0) { 810 dst.put(ord.apply(ZERO)); 811 return 1; 812 } 813 BigDecimal abs = val.abs(); 814 if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0 815 len = encodeNumericLarge(dst, normalize(val)); 816 } else { // 1.0 > abs(v) >= 0.0 817 len = encodeNumericSmall(dst, normalize(val)); 818 } 819 ord.apply(dst.getBytes(), offset + start, len); 820 return len; 821 } 822 823 /** 824 * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes 825 * a value in Numeric encoding and is within the valid range of 826 * {@link BigDecimal} values. {@link BigDecimal} does not support {@code NaN} 827 * or {@code Infinte} values. 828 * @see #decodeNumericAsDouble(PositionedByteRange) 829 */ 830 private static BigDecimal decodeNumericValue(PositionedByteRange src) { 831 final int e; 832 byte header = src.get(); 833 boolean dsc = -1 == Integer.signum(header); 834 header = dsc ? DESCENDING.apply(header) : header; 835 836 if (header == NULL) return null; 837 if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */ 838 e = (int) getVaruint64(src, !dsc); 839 return decodeSignificand(src, e, !dsc).negate(); 840 } 841 if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) { 842 /* Medium negative number: 0x13-E, ~M */ 843 e = NEG_MED_MAX - header; 844 return decodeSignificand(src, e, !dsc).negate(); 845 } 846 if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */ 847 e = (int) -getVaruint64(src, dsc); 848 return decodeSignificand(src, e, !dsc).negate(); 849 } 850 if (header == ZERO) { 851 return BigDecimal.ZERO; 852 } 853 if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */ 854 e = (int) -getVaruint64(src, !dsc); 855 return decodeSignificand(src, e, dsc); 856 } 857 if (header >= POS_MED_MIN && header <= POS_MED_MAX) { 858 /* Medium positive number: 0x17+E, M */ 859 e = header - POS_MED_MIN; 860 return decodeSignificand(src, e, dsc); 861 } 862 if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */ 863 e = (int) getVaruint64(src, dsc); 864 return decodeSignificand(src, e, dsc); 865 } 866 throw unexpectedHeader(header); 867 } 868 869 /** 870 * Decode a primitive {@code double} value from the Numeric encoding. Numeric 871 * encoding is based on {@link BigDecimal}; in the event the encoded value is 872 * larger than can be represented in a {@code double}, this method performs 873 * an implicit narrowing conversion as described in 874 * {@link BigDecimal#doubleValue()}. 875 * @throws NullPointerException when the encoded value is {@code NULL}. 876 * @throws IllegalArgumentException when the encoded value is not a Numeric. 877 * @see #encodeNumeric(PositionedByteRange, double, Order) 878 * @see BigDecimal#doubleValue() 879 */ 880 public static double decodeNumericAsDouble(PositionedByteRange src) { 881 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 882 if (isNull(src)) { 883 throw new NullPointerException("A null value cannot be decoded to a double."); 884 } 885 if (isNumericNaN(src)) { 886 src.get(); 887 return Double.NaN; 888 } 889 if (isNumericZero(src)) { 890 src.get(); 891 return Double.valueOf(0.0); 892 } 893 894 byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek(); 895 896 if (header == NEG_INF) { 897 src.get(); 898 return Double.NEGATIVE_INFINITY; 899 } else if (header == POS_INF) { 900 src.get(); 901 return Double.POSITIVE_INFINITY; 902 } else { 903 return decodeNumericValue(src).doubleValue(); 904 } 905 } 906 907 /** 908 * Decode a primitive {@code long} value from the Numeric encoding. Numeric 909 * encoding is based on {@link BigDecimal}; in the event the encoded value is 910 * larger than can be represented in a {@code long}, this method performs an 911 * implicit narrowing conversion as described in 912 * {@link BigDecimal#doubleValue()}. 913 * @throws NullPointerException when the encoded value is {@code NULL}. 914 * @throws IllegalArgumentException when the encoded value is not a Numeric. 915 * @see #encodeNumeric(PositionedByteRange, long, Order) 916 * @see BigDecimal#longValue() 917 */ 918 public static long decodeNumericAsLong(PositionedByteRange src) { 919 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 920 if (isNull(src)) throw new NullPointerException(); 921 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 922 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 923 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 924 925 if (isNumericZero(src)) { 926 src.get(); 927 return Long.valueOf(0); 928 } 929 return decodeNumericValue(src).longValue(); 930 } 931 932 /** 933 * Decode a {@link BigDecimal} value from the variable-length encoding. 934 * @throws IllegalArgumentException when the encoded value is not a Numeric. 935 * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order) 936 */ 937 public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) { 938 if (isNull(src)) { 939 src.get(); 940 return null; 941 } 942 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 943 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 944 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 945 return decodeNumericValue(src); 946 } 947 948 /** 949 * Encode a String value. String encoding is 0x00-terminated and so it does 950 * not support {@code \u0000} codepoints in the value. 951 * @param dst The destination to which the encoded value is written. 952 * @param val The value to encode. 953 * @param ord The {@link Order} to respect while encoding {@code val}. 954 * @return the number of bytes written. 955 * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}. 956 */ 957 public static int encodeString(PositionedByteRange dst, String val, Order ord) { 958 if (null == val) { 959 return encodeNull(dst, ord); 960 } 961 if (val.contains("\u0000")) 962 throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); 963 final int offset = dst.getOffset(), start = dst.getPosition(); 964 dst.put(TEXT); 965 // TODO: is there no way to decode into dst directly? 966 dst.put(val.getBytes(UTF8)); 967 dst.put(TERM); 968 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 969 return dst.getPosition() - start; 970 } 971 972 /** 973 * Decode a String value. 974 */ 975 public static String decodeString(PositionedByteRange src) { 976 final byte header = src.get(); 977 if (header == NULL || header == DESCENDING.apply(NULL)) 978 return null; 979 assert header == TEXT || header == DESCENDING.apply(TEXT); 980 Order ord = header == TEXT ? ASCENDING : DESCENDING; 981 byte[] a = src.getBytes(); 982 final int offset = src.getOffset(), start = src.getPosition(); 983 final byte terminator = ord.apply(TERM); 984 int rawStartPos = offset + start, rawTermPos = rawStartPos; 985 for (; a[rawTermPos] != terminator; rawTermPos++) 986 ; 987 src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1 988 if (DESCENDING == ord) { 989 // make a copy so that we don't disturb encoded value with ord. 990 byte[] copy = new byte[rawTermPos - rawStartPos]; 991 System.arraycopy(a, rawStartPos, copy, 0, copy.length); 992 ord.apply(copy); 993 return new String(copy, UTF8); 994 } else { 995 return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8); 996 } 997 } 998 999 /** 1000 * Calculate the expected BlobVar encoded length based on unencoded length. 1001 */ 1002 public static int blobVarEncodedLength(int len) { 1003 if (0 == len) 1004 return 2; // 1-byte header + 1-byte terminator 1005 else 1006 return (int) 1007 Math.ceil( 1008 (len * 8) // 8-bits per input byte 1009 / 7.0) // 7-bits of input data per encoded byte, rounded up 1010 + 1; // + 1-byte header 1011 } 1012 1013 /** 1014 * Calculate the expected BlobVar decoded length based on encoded length. 1015 */ 1016 @VisibleForTesting 1017 static int blobVarDecodedLength(int len) { 1018 return 1019 ((len 1020 - 1) // 1-byte header 1021 * 7) // 7-bits of payload per encoded byte 1022 / 8; // 8-bits per byte 1023 } 1024 1025 /** 1026 * Encode a Blob value using a modified varint encoding scheme. 1027 * <p> 1028 * This format encodes a byte[] value such that no limitations on the input 1029 * value are imposed. The first byte encodes the encoding scheme that 1030 * follows, {@link #BLOB_VAR}. Each encoded byte thereafter consists of a 1031 * header bit followed by 7 bits of payload. A header bit of '1' indicates 1032 * continuation of the encoding. A header bit of '0' indicates this byte 1033 * contains the last of the payload. An empty input value is encoded as the 1034 * header byte immediately followed by a termination byte {@code 0x00}. This 1035 * is not ambiguous with the encoded value of {@code []}, which results in 1036 * {@code [0x80, 0x00]}. 1037 * </p> 1038 * @return the number of bytes written. 1039 */ 1040 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen, 1041 Order ord) { 1042 if (null == val) { 1043 return encodeNull(dst, ord); 1044 } 1045 // Empty value is null-terminated. All other values are encoded as 7-bits per byte. 1046 assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected."; 1047 final int offset = dst.getOffset(), start = dst.getPosition(); 1048 dst.put(BLOB_VAR); 1049 if (0 == vlen) { 1050 dst.put(TERM); 1051 } else { 1052 byte s = 1, t = 0; 1053 for (int i = voff; i < vlen; i++) { 1054 dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s))); 1055 if (s < 7) { 1056 t = (byte) (val[i] << (7 - s)); 1057 s++; 1058 } else { 1059 dst.put((byte) (0x80 | val[i])); 1060 s = 1; 1061 t = 0; 1062 } 1063 } 1064 if (s > 1) { 1065 dst.put((byte) (0x7f & t)); 1066 } else { 1067 dst.getBytes()[offset + dst.getPosition() - 1] = 1068 (byte) (dst.getBytes()[offset + dst.getPosition() - 1] & 0x7f); 1069 } 1070 } 1071 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1072 return dst.getPosition() - start; 1073 } 1074 1075 /** 1076 * Encode a blob value using a modified varint encoding scheme. 1077 * @return the number of bytes written. 1078 * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order) 1079 */ 1080 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) { 1081 return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord); 1082 } 1083 1084 /** 1085 * Decode a blob value that was encoded using BlobVar encoding. 1086 */ 1087 public static byte[] decodeBlobVar(PositionedByteRange src) { 1088 final byte header = src.get(); 1089 if (header == NULL || header == DESCENDING.apply(NULL)) { 1090 return null; 1091 } 1092 assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR); 1093 Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING; 1094 if (src.peek() == ord.apply(TERM)) { 1095 // skip empty input buffer. 1096 src.get(); 1097 return new byte[0]; 1098 } 1099 final int offset = src.getOffset(), start = src.getPosition(); 1100 int end; 1101 byte[] a = src.getBytes(); 1102 for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++) 1103 ; 1104 end++; // increment end to 1-past last byte 1105 // create ret buffer using length of encoded data + 1 (header byte) 1106 PositionedByteRange ret = new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start 1107 + 1)); 1108 int s = 6; 1109 byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff); 1110 for (int i = start + 1; i < end; i++) { 1111 if (s == 7) { 1112 ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f))); 1113 i++; 1114 // explicitly reset t -- clean up overflow buffer after decoding 1115 // a full cycle and retain assertion condition below. This happens 1116 t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893) 1117 } else { 1118 ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s))); 1119 } 1120 if (i == end) break; 1121 t = (byte) ((ord.apply(a[offset + i]) << (8 - s)) & 0xff); 1122 s = s == 1 ? 7 : s - 1; 1123 } 1124 src.setPosition(end); 1125 assert t == 0 : "Unexpected bits remaining after decoding blob."; 1126 assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer."; 1127 return ret.getBytes(); 1128 } 1129 1130 /** 1131 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in 1132 * DESCENDING order is NULL terminated so as to preserve proper sorting of 1133 * {@code []} and so it does not support {@code 0x00} in the value. 1134 * @return the number of bytes written. 1135 * @throws IllegalArgumentException when {@code ord} is DESCENDING and 1136 * {@code val} contains a {@code 0x00} byte. 1137 */ 1138 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen, 1139 Order ord) { 1140 if (null == val) { 1141 encodeNull(dst, ord); 1142 if (ASCENDING == ord) return 1; 1143 else { 1144 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1145 // sort-order semantics of null values. 1146 dst.put(ord.apply(TERM)); 1147 return 2; 1148 } 1149 } 1150 // Blobs as final entry in a compound key are written unencoded. 1151 assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2); 1152 if (DESCENDING == ord) { 1153 for (int i = 0; i < vlen; i++) { 1154 if (TERM == val[voff + i]) { 1155 throw new IllegalArgumentException("0x00 bytes not permitted in value."); 1156 } 1157 } 1158 } 1159 final int offset = dst.getOffset(), start = dst.getPosition(); 1160 dst.put(BLOB_COPY); 1161 dst.put(val, voff, vlen); 1162 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1163 // sort-order semantics of null values. 1164 if (DESCENDING == ord) dst.put(TERM); 1165 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1166 return dst.getPosition() - start; 1167 } 1168 1169 /** 1170 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in 1171 * DESCENDING order is NULL terminated so as to preserve proper sorting of 1172 * {@code []} and so it does not support {@code 0x00} in the value. 1173 * @return the number of bytes written. 1174 * @throws IllegalArgumentException when {@code ord} is DESCENDING and 1175 * {@code val} contains a {@code 0x00} byte. 1176 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1177 */ 1178 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) { 1179 return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord); 1180 } 1181 1182 /** 1183 * Decode a Blob value, byte-for-byte copy. 1184 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1185 */ 1186 public static byte[] decodeBlobCopy(PositionedByteRange src) { 1187 byte header = src.get(); 1188 if (header == NULL || header == DESCENDING.apply(NULL)) { 1189 return null; 1190 } 1191 assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY); 1192 Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING; 1193 final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1); 1194 byte[] ret = new byte[length]; 1195 src.get(ret); 1196 ord.apply(ret, 0, ret.length); 1197 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1198 // sort-order semantics of null values. 1199 if (DESCENDING == ord) src.get(); 1200 return ret; 1201 } 1202 1203 /** 1204 * Encode a null value. 1205 * @param dst The destination to which encoded digits are written. 1206 * @param ord The {@link Order} to respect while encoding {@code val}. 1207 * @return the number of bytes written. 1208 */ 1209 public static int encodeNull(PositionedByteRange dst, Order ord) { 1210 dst.put(ord.apply(NULL)); 1211 return 1; 1212 } 1213 1214 /** 1215 * Encode an {@code int8} value using the fixed-length encoding. 1216 * @return the number of bytes written. 1217 * @see #encodeInt64(PositionedByteRange, long, Order) 1218 * @see #decodeInt8(PositionedByteRange) 1219 */ 1220 public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) { 1221 final int offset = dst.getOffset(), start = dst.getPosition(); 1222 dst.put(FIXED_INT8) 1223 .put((byte) (val ^ 0x80)); 1224 ord.apply(dst.getBytes(), offset + start, 2); 1225 return 2; 1226 } 1227 1228 /** 1229 * Decode an {@code int8} value. 1230 * @see #encodeInt8(PositionedByteRange, byte, Order) 1231 */ 1232 public static byte decodeInt8(PositionedByteRange src) { 1233 final byte header = src.get(); 1234 assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8); 1235 Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING; 1236 return (byte)((ord.apply(src.get()) ^ 0x80) & 0xff); 1237 } 1238 1239 /** 1240 * Encode an {@code int16} value using the fixed-length encoding. 1241 * @return the number of bytes written. 1242 * @see #encodeInt64(PositionedByteRange, long, Order) 1243 * @see #decodeInt16(PositionedByteRange) 1244 */ 1245 public static int encodeInt16(PositionedByteRange dst, short val, Order ord) { 1246 final int offset = dst.getOffset(), start = dst.getPosition(); 1247 dst.put(FIXED_INT16) 1248 .put((byte) ((val >> 8) ^ 0x80)) 1249 .put((byte) val); 1250 ord.apply(dst.getBytes(), offset + start, 3); 1251 return 3; 1252 } 1253 1254 /** 1255 * Decode an {@code int16} value. 1256 * @see #encodeInt16(PositionedByteRange, short, Order) 1257 */ 1258 public static short decodeInt16(PositionedByteRange src) { 1259 final byte header = src.get(); 1260 assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16); 1261 Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING; 1262 short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff); 1263 val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff)); 1264 return val; 1265 } 1266 1267 /** 1268 * Encode an {@code int32} value using the fixed-length encoding. 1269 * @return the number of bytes written. 1270 * @see #encodeInt64(PositionedByteRange, long, Order) 1271 * @see #decodeInt32(PositionedByteRange) 1272 */ 1273 public static int encodeInt32(PositionedByteRange dst, int val, Order ord) { 1274 final int offset = dst.getOffset(), start = dst.getPosition(); 1275 dst.put(FIXED_INT32) 1276 .put((byte) ((val >> 24) ^ 0x80)) 1277 .put((byte) (val >> 16)) 1278 .put((byte) (val >> 8)) 1279 .put((byte) val); 1280 ord.apply(dst.getBytes(), offset + start, 5); 1281 return 5; 1282 } 1283 1284 /** 1285 * Decode an {@code int32} value. 1286 * @see #encodeInt32(PositionedByteRange, int, Order) 1287 */ 1288 public static int decodeInt32(PositionedByteRange src) { 1289 final byte header = src.get(); 1290 assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32); 1291 Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING; 1292 int val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1293 for (int i = 1; i < 4; i++) { 1294 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1295 } 1296 return val; 1297 } 1298 1299 /** 1300 * Encode an {@code int64} value using the fixed-length encoding. 1301 * <p> 1302 * This format ensures that all longs sort in their natural order, as they 1303 * would sort when using signed long comparison. 1304 * </p> 1305 * <p> 1306 * All Longs are serialized to an 8-byte, fixed-width sortable byte format. 1307 * Serialization is performed by inverting the integer sign bit and writing 1308 * the resulting bytes to the byte array in big endian order. The encoded 1309 * value is prefixed by the {@link #FIXED_INT64} header byte. This encoding 1310 * is designed to handle java language primitives and so Null values are NOT 1311 * supported by this implementation. 1312 * </p> 1313 * <p> 1314 * For example: 1315 * </p> 1316 * <pre> 1317 * Input: 0x0000000000000005 (5) 1318 * Result: 0x288000000000000005 1319 * 1320 * Input: 0xfffffffffffffffb (-4) 1321 * Result: 0x280000000000000004 1322 * 1323 * Input: 0x7fffffffffffffff (Long.MAX_VALUE) 1324 * Result: 0x28ffffffffffffffff 1325 * 1326 * Input: 0x8000000000000000 (Long.MIN_VALUE) 1327 * Result: 0x287fffffffffffffff 1328 * </pre> 1329 * <p> 1330 * This encoding format, and much of this documentation string, is based on 1331 * Orderly's {@code FixedIntWritableRowKey}. 1332 * </p> 1333 * @return the number of bytes written. 1334 * @see #decodeInt64(PositionedByteRange) 1335 */ 1336 public static int encodeInt64(PositionedByteRange dst, long val, Order ord) { 1337 final int offset = dst.getOffset(), start = dst.getPosition(); 1338 dst.put(FIXED_INT64) 1339 .put((byte) ((val >> 56) ^ 0x80)) 1340 .put((byte) (val >> 48)) 1341 .put((byte) (val >> 40)) 1342 .put((byte) (val >> 32)) 1343 .put((byte) (val >> 24)) 1344 .put((byte) (val >> 16)) 1345 .put((byte) (val >> 8)) 1346 .put((byte) val); 1347 ord.apply(dst.getBytes(), offset + start, 9); 1348 return 9; 1349 } 1350 1351 /** 1352 * Decode an {@code int64} value. 1353 * @see #encodeInt64(PositionedByteRange, long, Order) 1354 */ 1355 public static long decodeInt64(PositionedByteRange src) { 1356 final byte header = src.get(); 1357 assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64); 1358 Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING; 1359 long val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1360 for (int i = 1; i < 8; i++) { 1361 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1362 } 1363 return val; 1364 } 1365 1366 /** 1367 * Encode a 32-bit floating point value using the fixed-length encoding. 1368 * Encoding format is described at length in 1369 * {@link #encodeFloat64(PositionedByteRange, double, Order)}. 1370 * @return the number of bytes written. 1371 * @see #decodeFloat32(PositionedByteRange) 1372 * @see #encodeFloat64(PositionedByteRange, double, Order) 1373 */ 1374 public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) { 1375 final int offset = dst.getOffset(), start = dst.getPosition(); 1376 int i = Float.floatToIntBits(val); 1377 i ^= ((i >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); 1378 dst.put(FIXED_FLOAT32) 1379 .put((byte) (i >> 24)) 1380 .put((byte) (i >> 16)) 1381 .put((byte) (i >> 8)) 1382 .put((byte) i); 1383 ord.apply(dst.getBytes(), offset + start, 5); 1384 return 5; 1385 } 1386 1387 /** 1388 * Decode a 32-bit floating point value using the fixed-length encoding. 1389 * @see #encodeFloat32(PositionedByteRange, float, Order) 1390 */ 1391 public static float decodeFloat32(PositionedByteRange src) { 1392 final byte header = src.get(); 1393 assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32); 1394 Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING; 1395 int val = ord.apply(src.get()) & 0xff; 1396 for (int i = 1; i < 4; i++) { 1397 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1398 } 1399 val ^= (~val >> (Integer.SIZE - 1)) | Integer.MIN_VALUE; 1400 return Float.intBitsToFloat(val); 1401 } 1402 1403 /** 1404 * Encode a 64-bit floating point value using the fixed-length encoding. 1405 * <p> 1406 * This format ensures the following total ordering of floating point 1407 * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < 1408 * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ... 1409 * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN 1410 * </p> 1411 * <p> 1412 * Floating point numbers are encoded as specified in IEEE 754. A 64-bit 1413 * double precision float consists of a sign bit, 11-bit unsigned exponent 1414 * encoded in offset-1023 notation, and a 52-bit significand. The format is 1415 * described further in the <a 1416 * href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision 1417 * Floating Point Wikipedia page</a> </p> 1418 * <p> 1419 * The value of a normal float is -1 <sup>sign bit</sup> × 1420 * 2<sup>exponent - 1023</sup> × 1.significand 1421 * </p> 1422 * <p> 1423 * The IEE754 floating point format already preserves sort ordering for 1424 * positive floating point numbers when the raw bytes are compared in most 1425 * significant byte order. This is discussed further at <a href= 1426 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm" 1427 * > http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. 1428 * htm</a> 1429 * </p> 1430 * <p> 1431 * Thus, we need only ensure that negative numbers sort in the the exact 1432 * opposite order as positive numbers (so that say, negative infinity is 1433 * less than negative 1), and that all negative numbers compare less than 1434 * any positive number. To accomplish this, we invert the sign bit of all 1435 * floating point numbers, and we also invert the exponent and significand 1436 * bits if the floating point number was negative. 1437 * </p> 1438 * <p> 1439 * More specifically, we first store the floating point bits into a 64-bit 1440 * long {@code l} using {@link Double#doubleToLongBits}. This method 1441 * collapses all NaNs into a single, canonical NaN value but otherwise 1442 * leaves the bits unchanged. We then compute 1443 * </p> 1444 * <pre> 1445 * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE 1446 * </pre> 1447 * <p> 1448 * which inverts the sign bit and XOR's all other bits with the sign bit 1449 * itself. Comparing the raw bytes of {@code l} in most significant 1450 * byte order is equivalent to performing a double precision floating point 1451 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs 1452 * don't compare equal to anything when performing floating point 1453 * comparisons). 1454 * </p> 1455 * <p> 1456 * The resulting long integer is then converted into a byte array by 1457 * serializing the long one byte at a time in most significant byte order. 1458 * The serialized integer is prefixed by a single header byte. All 1459 * serialized values are 9 bytes in length. 1460 * </p> 1461 * <p> 1462 * This encoding format, and much of this highly detailed documentation 1463 * string, is based on Orderly's {@code DoubleWritableRowKey}. 1464 * </p> 1465 * @return the number of bytes written. 1466 * @see #decodeFloat64(PositionedByteRange) 1467 */ 1468 public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) { 1469 final int offset = dst.getOffset(), start = dst.getPosition(); 1470 long lng = Double.doubleToLongBits(val); 1471 lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); 1472 dst.put(FIXED_FLOAT64) 1473 .put((byte) (lng >> 56)) 1474 .put((byte) (lng >> 48)) 1475 .put((byte) (lng >> 40)) 1476 .put((byte) (lng >> 32)) 1477 .put((byte) (lng >> 24)) 1478 .put((byte) (lng >> 16)) 1479 .put((byte) (lng >> 8)) 1480 .put((byte) lng); 1481 ord.apply(dst.getBytes(), offset + start, 9); 1482 return 9; 1483 } 1484 1485 /** 1486 * Decode a 64-bit floating point value using the fixed-length encoding. 1487 * @see #encodeFloat64(PositionedByteRange, double, Order) 1488 */ 1489 public static double decodeFloat64(PositionedByteRange src) { 1490 final byte header = src.get(); 1491 assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64); 1492 Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING; 1493 long val = ord.apply(src.get()) & 0xff; 1494 for (int i = 1; i < 8; i++) { 1495 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1496 } 1497 val ^= (~val >> (Long.SIZE - 1)) | Long.MIN_VALUE; 1498 return Double.longBitsToDouble(val); 1499 } 1500 1501 /** 1502 * Returns true when {@code src} appears to be positioned an encoded value, 1503 * false otherwise. 1504 */ 1505 public static boolean isEncodedValue(PositionedByteRange src) { 1506 return isNull(src) || isNumeric(src) || isFixedInt8(src) || isFixedInt16(src) 1507 || isFixedInt32(src) || isFixedInt64(src) 1508 || isFixedFloat32(src) || isFixedFloat64(src) || isText(src) || isBlobCopy(src) 1509 || isBlobVar(src); 1510 } 1511 1512 /** 1513 * Return true when the next encoded value in {@code src} is null, false 1514 * otherwise. 1515 */ 1516 public static boolean isNull(PositionedByteRange src) { 1517 return NULL == 1518 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1519 } 1520 1521 /** 1522 * Return true when the next encoded value in {@code src} uses Numeric 1523 * encoding, false otherwise. {@code NaN}, {@code +/-Inf} are valid Numeric 1524 * values. 1525 */ 1526 public static boolean isNumeric(PositionedByteRange src) { 1527 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1528 return x >= NEG_INF && x <= NAN; 1529 } 1530 1531 /** 1532 * Return true when the next encoded value in {@code src} uses Numeric 1533 * encoding and is {@code Infinite}, false otherwise. 1534 */ 1535 public static boolean isNumericInfinite(PositionedByteRange src) { 1536 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1537 return NEG_INF == x || POS_INF == x; 1538 } 1539 1540 /** 1541 * Return true when the next encoded value in {@code src} uses Numeric 1542 * encoding and is {@code NaN}, false otherwise. 1543 */ 1544 public static boolean isNumericNaN(PositionedByteRange src) { 1545 return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1546 } 1547 1548 /** 1549 * Return true when the next encoded value in {@code src} uses Numeric 1550 * encoding and is {@code 0}, false otherwise. 1551 */ 1552 public static boolean isNumericZero(PositionedByteRange src) { 1553 return ZERO == 1554 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1555 } 1556 1557 /** 1558 * Return true when the next encoded value in {@code src} uses fixed-width 1559 * Int8 encoding, false otherwise. 1560 */ 1561 public static boolean isFixedInt8(PositionedByteRange src) { 1562 return FIXED_INT8 == 1563 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1564 } 1565 1566 /** 1567 * Return true when the next encoded value in {@code src} uses fixed-width 1568 * Int16 encoding, false otherwise. 1569 */ 1570 public static boolean isFixedInt16(PositionedByteRange src) { 1571 return FIXED_INT16 == 1572 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1573 } 1574 1575 /** 1576 * Return true when the next encoded value in {@code src} uses fixed-width 1577 * Int32 encoding, false otherwise. 1578 */ 1579 public static boolean isFixedInt32(PositionedByteRange src) { 1580 return FIXED_INT32 == 1581 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1582 } 1583 1584 /** 1585 * Return true when the next encoded value in {@code src} uses fixed-width 1586 * Int64 encoding, false otherwise. 1587 */ 1588 public static boolean isFixedInt64(PositionedByteRange src) { 1589 return FIXED_INT64 == 1590 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1591 } 1592 1593 /** 1594 * Return true when the next encoded value in {@code src} uses fixed-width 1595 * Float32 encoding, false otherwise. 1596 */ 1597 public static boolean isFixedFloat32(PositionedByteRange src) { 1598 return FIXED_FLOAT32 == 1599 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1600 } 1601 1602 /** 1603 * Return true when the next encoded value in {@code src} uses fixed-width 1604 * Float64 encoding, false otherwise. 1605 */ 1606 public static boolean isFixedFloat64(PositionedByteRange src) { 1607 return FIXED_FLOAT64 == 1608 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1609 } 1610 1611 /** 1612 * Return true when the next encoded value in {@code src} uses Text encoding, 1613 * false otherwise. 1614 */ 1615 public static boolean isText(PositionedByteRange src) { 1616 return TEXT == 1617 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1618 } 1619 1620 /** 1621 * Return true when the next encoded value in {@code src} uses BlobVar 1622 * encoding, false otherwise. 1623 */ 1624 public static boolean isBlobVar(PositionedByteRange src) { 1625 return BLOB_VAR == 1626 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1627 } 1628 1629 /** 1630 * Return true when the next encoded value in {@code src} uses BlobCopy 1631 * encoding, false otherwise. 1632 */ 1633 public static boolean isBlobCopy(PositionedByteRange src) { 1634 return BLOB_COPY == 1635 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1636 } 1637 1638 /** 1639 * Skip {@code buff}'s position forward over one encoded value. 1640 * @return number of bytes skipped. 1641 */ 1642 public static int skip(PositionedByteRange src) { 1643 final int start = src.getPosition(); 1644 byte header = src.get(); 1645 Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING; 1646 header = ord.apply(header); 1647 1648 switch (header) { 1649 case NULL: 1650 case NEG_INF: 1651 return 1; 1652 case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */ 1653 skipVaruint64(src, DESCENDING != ord); 1654 skipSignificand(src, DESCENDING != ord); 1655 return src.getPosition() - start; 1656 case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */ 1657 case NEG_MED_MIN + 0x01: 1658 case NEG_MED_MIN + 0x02: 1659 case NEG_MED_MIN + 0x03: 1660 case NEG_MED_MIN + 0x04: 1661 case NEG_MED_MIN + 0x05: 1662 case NEG_MED_MIN + 0x06: 1663 case NEG_MED_MIN + 0x07: 1664 case NEG_MED_MIN + 0x08: 1665 case NEG_MED_MIN + 0x09: 1666 case NEG_MED_MAX: 1667 skipSignificand(src, DESCENDING != ord); 1668 return src.getPosition() - start; 1669 case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */ 1670 skipVaruint64(src, DESCENDING == ord); 1671 skipSignificand(src, DESCENDING != ord); 1672 return src.getPosition() - start; 1673 case ZERO: 1674 return 1; 1675 case POS_SMALL: /* Small positive number: 0x16, ~-E, M */ 1676 skipVaruint64(src, DESCENDING != ord); 1677 skipSignificand(src, DESCENDING == ord); 1678 return src.getPosition() - start; 1679 case POS_MED_MIN: /* Medium positive number: 0x17+E, M */ 1680 case POS_MED_MIN + 0x01: 1681 case POS_MED_MIN + 0x02: 1682 case POS_MED_MIN + 0x03: 1683 case POS_MED_MIN + 0x04: 1684 case POS_MED_MIN + 0x05: 1685 case POS_MED_MIN + 0x06: 1686 case POS_MED_MIN + 0x07: 1687 case POS_MED_MIN + 0x08: 1688 case POS_MED_MIN + 0x09: 1689 case POS_MED_MAX: 1690 skipSignificand(src, DESCENDING == ord); 1691 return src.getPosition() - start; 1692 case POS_LARGE: /* Large positive number: 0x22, E, M */ 1693 skipVaruint64(src, DESCENDING == ord); 1694 skipSignificand(src, DESCENDING == ord); 1695 return src.getPosition() - start; 1696 case POS_INF: 1697 return 1; 1698 case NAN: 1699 return 1; 1700 case FIXED_INT8: 1701 src.setPosition(src.getPosition() + 1); 1702 return src.getPosition() - start; 1703 case FIXED_INT16: 1704 src.setPosition(src.getPosition() + 2); 1705 return src.getPosition() - start; 1706 case FIXED_INT32: 1707 src.setPosition(src.getPosition() + 4); 1708 return src.getPosition() - start; 1709 case FIXED_INT64: 1710 src.setPosition(src.getPosition() + 8); 1711 return src.getPosition() - start; 1712 case FIXED_FLOAT32: 1713 src.setPosition(src.getPosition() + 4); 1714 return src.getPosition() - start; 1715 case FIXED_FLOAT64: 1716 src.setPosition(src.getPosition() + 8); 1717 return src.getPosition() - start; 1718 case TEXT: 1719 // for null-terminated values, skip to the end. 1720 do { 1721 header = ord.apply(src.get()); 1722 } while (header != TERM); 1723 return src.getPosition() - start; 1724 case BLOB_VAR: 1725 // read until we find a 0 in the MSB 1726 do { 1727 header = ord.apply(src.get()); 1728 } while ((byte) (header & 0x80) != TERM); 1729 return src.getPosition() - start; 1730 case BLOB_COPY: 1731 if (Order.DESCENDING == ord) { 1732 // if descending, read to termination byte. 1733 do { 1734 header = ord.apply(src.get()); 1735 } while (header != TERM); 1736 return src.getPosition() - start; 1737 } else { 1738 // otherwise, just skip to the end. 1739 src.setPosition(src.getLength()); 1740 return src.getPosition() - start; 1741 } 1742 default: 1743 throw unexpectedHeader(header); 1744 } 1745 } 1746 1747 /** 1748 * Return the number of encoded entries remaining in {@code buff}. The 1749 * state of {@code buff} is not modified through use of this method. 1750 */ 1751 public static int length(PositionedByteRange buff) { 1752 PositionedByteRange b = 1753 new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength()); 1754 b.setPosition(buff.getPosition()); 1755 int cnt = 0; 1756 for (; isEncodedValue(b); skip(b), cnt++) 1757 ; 1758 return cnt; 1759 } 1760}