Skip to content

Commit

Permalink
add SimdJsonParser2 base on bitindex
Browse files Browse the repository at this point in the history
  • Loading branch information
jimeng committed Oct 4, 2024
1 parent d0c4330 commit ebe3d00
Show file tree
Hide file tree
Showing 4 changed files with 255 additions and 5 deletions.
10 changes: 9 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ group = 'org.simdjson'
version = scmVersion.version

repositories {
mavenLocal()
mavenCentral()
}

Expand All @@ -45,6 +46,7 @@ java {
ext {
junitVersion = '5.10.2'
jsoniterScalaVersion = '2.28.4'
lombokVersion = '1.18.34'
}

dependencies {
Expand All @@ -53,6 +55,10 @@ dependencies {
jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion
jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre'
compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion
compileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
annotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testCompileOnly group: 'org.projectlombok', name: 'lombok', version: lombokVersion
testAnnotationProcessor group: 'org.projectlombok', name: 'lombok', version: lombokVersion

testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
Expand Down Expand Up @@ -160,7 +166,9 @@ publishing {
publications {
mavenJava(MavenPublication) {
from(components.java)

groupId = 'org.simdjson'
artifactId = 'simdjson-java'
version = scmVersion.version
pom {
name = project.name
description = 'A Java version of simdjson, a high-performance JSON parser utilizing SIMD instructions.'
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/simdjson/BitIndexes.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.simdjson;

class BitIndexes {
public class BitIndexes {

private final int[] indexes;

Expand Down Expand Up @@ -44,8 +44,8 @@ private long clearLowestBit(long bits) {
return bits & (bits - 1);
}

void advance() {
readIdx++;
int advance() {
return indexes[readIdx++];
}

int getAndAdvance() {
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/org/simdjson/SimdJsonParser.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.simdjson;

import lombok.Getter;

public class SimdJsonParser {

private static final int PADDING = 64;
Expand All @@ -24,7 +26,12 @@ public SimdJsonParser(int capacity, int maxDepth) {
paddedBuffer = new byte[capacity];
indexer = new StructuralIndexer(bitIndexes);
}

public BitIndexes buildBitIndex (byte[] buffer, int len) {
byte[] padded = padIfNeeded(buffer, len);
reset();
stage1(padded, len);
return bitIndexes;
}
public <T> T parse(byte[] buffer, int len, Class<T> expectedType) {
byte[] padded = padIfNeeded(buffer, len);
reset();
Expand Down
235 changes: 235 additions & 0 deletions src/main/java/org/simdjson/SimdJsonParser2.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
package org.simdjson;

import java.util.HashMap;
import java.util.Map;

import lombok.Data;
import lombok.RequiredArgsConstructor;

public class SimdJsonParser2 {

@Data
@RequiredArgsConstructor
static class JsonNode {
private long version = 0;
private boolean isLeaf = false;
private final String name;
private String value = null;
private JsonNode parent = null;
private Map<String, JsonNode> children = new HashMap<>();
private int start = -1;
private int end = -1;
}

private final SimdJsonParser parser;
private BitIndexes bitIndexes;
private final JsonNode root = new JsonNode(null);
private final JsonNode[] row;
private final String[] result;
private final String[] emptyResult;
private JsonNode ptr;
private byte[] buffer;
private final int targetParseNum;
private long currentVersion = 0;
// pruning, when alreadyProcessedCols == NUM
private long alreadyProcessedCols = 0;

public SimdJsonParser2(String... args) {
parser = new SimdJsonParser();
targetParseNum = args.length;
row = new JsonNode[targetParseNum];
result = new String[targetParseNum];
emptyResult = new String[targetParseNum];
for (int i = 0; i < args.length; i++) {
emptyResult[i] = null;
}
for (int i = 0; i < targetParseNum; i++) {
JsonNode cur = root;
String[] paths = args[i].split("\\.");
for (int j = 0; j < paths.length; j++) {
if (!cur.getChildren().containsKey(paths[j])) {
JsonNode child = new JsonNode(paths[j]);
cur.getChildren().put(paths[j], child);
child.setParent(cur);
}
cur = cur.getChildren().get(paths[j]);
}
cur.setLeaf(true);
row[i] = cur;
}

}

public String[] parse(byte[] buffer, int len) {
this.bitIndexes = parser.buildBitIndex(buffer, len);
if (buffer == null || buffer.length == 0) {
return emptyResult;
}
this.alreadyProcessedCols = 0;
this.currentVersion++;
this.ptr = root;
this.buffer = buffer;

switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
throw new RuntimeException("invalid json format");
}
}
return getResult();
}

private void parseElement(String fieldName) {
if (fieldName == null) {
int start = bitIndexes.advance();
int realEnd = bitIndexes.advance();
while (realEnd > start) {
if (buffer[--realEnd] == '"') {
break;
}
}
fieldName = new String(buffer, start + 1, realEnd - start - 1);
}
if (!ptr.getChildren().containsKey(fieldName)) {
skip(false);
return;
}
ptr = ptr.getChildren().get(fieldName);
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
}
case '[' -> {
parseList();
}
default -> {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
}
}
ptr = ptr.getParent();
}

private void parseMap() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && alreadyProcessedCols < targetParseNum) {
parseElement(null);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
}
bitIndexes.advance();
}

private void parseList() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
int i = 0;
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && alreadyProcessedCols < targetParseNum) {
parseElement("" + i);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
}
i++;
}
ptr.setEnd(bitIndexes.peek());
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
}
bitIndexes.advance();
}

private String skip(boolean retainValue) {
int i = 0;
int start = retainValue ? bitIndexes.peek() : 0;
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '{') {
i++;
} else if (buffer[bitIndexes.peek()] == '}') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
case '[' -> {
i++;
while (i > 0) {
bitIndexes.advance();
if (buffer[bitIndexes.peek()] == '[') {
i++;
} else if (buffer[bitIndexes.peek()] == ']') {
i--;
}
}
int end = bitIndexes.peek();
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
case '"' -> {
bitIndexes.advance();
int realEnd = bitIndexes.peek();
while (realEnd > start) {
if (buffer[--realEnd] == '"') {
break;
}
}
return retainValue ? new String(buffer, start + 1, realEnd - start - 1) : null;
}
default -> {
bitIndexes.advance();
int realEnd = bitIndexes.peek();
while (realEnd >= start) {
--realEnd;
if (buffer[realEnd] >= '0' && buffer[realEnd] <= '9') {
break;
}
}
return retainValue ? new String(buffer, start, realEnd - start + 1) : null;
}
}
}

private String[] getResult() {
for (int i = 0; i < targetParseNum; i++) {
if (row[i].getVersion() < currentVersion) {
result[i] = null;
continue;
}
result[i] = row[i].getValue();
}
return result;
}
}

0 comments on commit ebe3d00

Please sign in to comment.