[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(sql): more efficient pattern lookup for short ASCII patterns #4706

Merged
merged 6 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,29 +191,22 @@ public boolean getBool(Record rec) {
int i = 0;
for (int n = size - 7 - patternSize + 1; i < n; i += 8) {
long zeroBytesWord = SwarUtils.markZeroBytes(us.longAt(i) ^ searchWord);
if (zeroBytesWord != 0) {
while (zeroBytesWord != 0) {
// We've found a match for the first pattern byte,
// slow down and check the full pattern.
int firstIndex = SwarUtils.indexOfFirstMarkedByte(zeroBytesWord);
int pos = firstIndex;
while (pos < 8) {
// Check if the pattern matches only for matched first bytes.
if (size - i - pos > 7) {
// It's safe to load full word.
if ((us.longAt(i + pos) & patternMask) == patternWord) {
return true;
}
} else {
// We can't call longAt safely near the sequence end,
// so construct the word from individual bytes.
if ((tailWord(us, i + pos) & patternMask) == patternWord) {
return true;
}
int pos = SwarUtils.indexOfFirstMarkedByte(zeroBytesWord);
// Check if the pattern matches only for matched first bytes.
if (size - i - pos > 7) {
// It's safe to load full word.
if ((us.longAt(i + pos) & patternMask) == patternWord) {
return true;
}
zeroBytesWord >>>= ((firstIndex + 1) << 3);
firstIndex = SwarUtils.indexOfFirstMarkedByte(zeroBytesWord);
pos += firstIndex + 1;
// Else, we can't call longAt safely near the sequence end,
// so construct the word from individual bytes.
} else if ((tailWord(us, i + pos) & patternMask) == patternWord) {
return true;
}
zeroBytesWord &= zeroBytesWord - 1;
}
}

Expand Down
14 changes: 11 additions & 3 deletions core/src/main/java/io/questdb/std/SwarUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,18 @@ public static int indexOfFirstMarkedByte(long w) {
/**
* Returns non-zero result in case if the input contains a zero byte.
* <p>
* First zero byte will be replaced with 0x80 while all preceding bytes (non-zero) - with zero values
* Note, that markZeroBytes gives no guarantees about output bytes following after position of the least significant zero byte from the arg w
* For the technique, see:
* <a href="http://graphics.stanford.edu/~seander/bithacks.html">Bit Twiddling Hacks</a>
* (Determine if a word has a byte equal to n).
* <p>
* <strong>Caveat</strong>:
* there are false positives, but they only occur if there is a real match.
* The false positives occur only to the left of the correct match, and only for
* a 0x01 byte.
* <p>
* Make sure to handle false positives gracefully by subsequent checks in code.
*/
public static long markZeroBytes(long w) {
return ((w - 0x0101010101010101L) & ~(w) & 0x8080808080808080L);
return ((w - 0x0101010101010101L) & (~w) & 0x8080808080808080L);
}
}
4 changes: 2 additions & 2 deletions core/src/test/java/io/questdb/test/std/SwarUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public void testIndexOfFirstMarkedByte() {
public void testMarkZeroBytes() {
Assert.assertEquals(0x0L, SwarUtils.markZeroBytes(-1L));
Assert.assertEquals(0x0L, SwarUtils.markZeroBytes(Long.MAX_VALUE));
Assert.assertEquals(0x8080808080808080L, SwarUtils.markZeroBytes(0x0000));
Assert.assertEquals(0x8080808080808080L, SwarUtils.markZeroBytes(0L));
Assert.assertEquals(0x8080808080808000L, SwarUtils.markZeroBytes(1L));
Assert.assertEquals(0x8080808080800080L, SwarUtils.markZeroBytes(1L << 9));
Expand All @@ -70,8 +71,7 @@ public void testMarkZeroBytes() {
Assert.assertEquals(0x8000808080808080L, SwarUtils.markZeroBytes(1L << 49));
Assert.assertEquals(0x0080808080808080L, SwarUtils.markZeroBytes(1L << 57));
Assert.assertEquals(0x0080808080808080L, SwarUtils.markZeroBytes(Long.MIN_VALUE));

// false positive:
Assert.assertEquals(0x8080808080808080L, SwarUtils.markZeroBytes(0x0100));
Assert.assertEquals(0x8080808080808080L, SwarUtils.markZeroBytes(0x0000));
}
}
Loading