Dave Jarvis' Repositories

Author	DaveJarvis <email>
Date	2021-06-27 16:24:29 GMT-0700
Commit	620dc4c4c3354279494426d0d3961462a3a01d48
Parent	54454ee
Delta	0 lines added, 126 lines removed, 126-line decrease

src/main/java/com/keenwrite/util/MurmurHash.java

		-package com.keenwrite.util;
		-
		-/**
		- * The MurmurHash3 algorithm was created by Austin Appleby and placed in the
		- * public domain. This Java port was authored by Yonik Seeley and also placed
		- * into the public domain. The author hereby disclaims copyright to this
		- * source code.
		- * <p>
		- * This produces exactly the same hash values as the final C++ version and is
		- * thus suitable for producing the same hash values across platforms.
		- * <p>
		- * The 32-bit x86 version of this hash should be the fastest variant for
		- * relatively short keys like ids. Using {@link #hash32} is a
		- * good choice for longer strings or returning more than 32 hashed bits.
		- * <p>
		- * The x86 and x64 versions do not produce the same results because
		- * algorithms are optimized for their respective platforms.
		- * <p>
		- * Code clean-up by White Magic Software, Ltd.
		- * </p>
		- */
		-public final class MurmurHash {
		- /**
		- * Returns the 32-bit x86-optimized hash of the UTF-8 bytes of the String
		- * without actually encoding the string to a temporary buffer. This is over
		- * twice as fast as hashing the result of {@link String#getBytes()}.
		- */
		- @SuppressWarnings( "unused" )
		- public static int hash32( CharSequence data, int offset, int len, int seed ) {
		- final int c1 = 0xcc9e2d51;
		- final int c2 = 0x1b873593;
		-
		- int h1 = seed;
		-
		- int pos = offset;
		- int end = offset + len;
		- int k1 = 0;
		- int k2;
		- int shift = 0;
		- int bits;
		- int nBytes = 0; // length in UTF8 bytes
		-
		- while( pos < end ) {
		- int code = data.charAt( pos++ );
		- if( code < 0x80 ) {
		- k2 = code;
		- bits = 8;
		- }
		- else if( code < 0x800 ) {
		- k2 = (0xC0 \| (code >> 6))
		- \| ((0x80 \| (code & 0x3F)) << 8);
		- bits = 16;
		- }
		- else if( code < 0xD800 \|\| code > 0xDFFF \|\| pos >= end ) {
		- // we check for pos>=end to encode an unpaired surrogate as 3 bytes.
		- k2 = (0xE0 \| (code >> 12))
		- \| ((0x80 \| ((code >> 6) & 0x3F)) << 8)
		- \| ((0x80 \| (code & 0x3F)) << 16);
		- bits = 24;
		- }
		- else {
		- // surrogate pair
		- // int utf32 = pos < end ? (int) data.charAt(pos++) : 0;
		- int utf32 = data.charAt( pos++ );
		- utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
		- k2 = (0xff & (0xF0 \| (utf32 >> 18)))
		- \| ((0x80 \| ((utf32 >> 12) & 0x3F))) << 8
		- \| ((0x80 \| ((utf32 >> 6) & 0x3F))) << 16
		- \| (0x80 \| (utf32 & 0x3F)) << 24;
		- bits = 32;
		- }
		-
		- k1 \|= k2 << shift;
		-
		- // int used_bits = 32 - shift; // how many bits of k2 were used in k1.
		- // int unused_bits = bits - used_bits; // (bits-(32-shift)) ==
		- // bits+shift-32 == bits-newshift
		-
		- shift += bits;
		- if( shift >= 32 ) {
		- // mix after we have a complete word
		-
		- k1 *= c1;
		- k1 = (k1 << 15) \| (k1 >>> 17); // ROTL32(k1,15);
		- k1 *= c2;
		-
		- h1 ^= k1;
		- h1 = (h1 << 13) \| (h1 >>> 19); // ROTL32(h1,13);
		- h1 = h1 * 5 + 0xe6546b64;
		-
		- shift -= 32;
		- // unfortunately, java won't let you shift 32 bits off, so we need to
		- // check for 0
		- if( shift != 0 ) {
		- k1 = k2 >>> (bits - shift); // bits used == bits - newshift
		- }
		- else {
		- k1 = 0;
		- }
		- nBytes += 4;
		- }
		-
		- } // inner
		-
		- // handle tail
		- if( shift > 0 ) {
		- nBytes += shift >> 3;
		- k1 *= c1;
		- k1 = (k1 << 15) \| (k1 >>> 17); // ROTL32(k1,15);
		- k1 *= c2;
		- h1 ^= k1;
		- }
		-
		- // finalization
		- h1 ^= nBytes;
		-
		- // fmix(h1);
		- h1 ^= h1 >>> 16;
		- h1 *= 0x85ebca6b;
		- h1 ^= h1 >>> 13;
		- h1 *= 0xc2b2ae35;
		- h1 ^= h1 >>> 16;
		-
		- return h1;
		- }
		-}

Find document elements by xpath expression