Skip to content

Commit

Permalink
add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
jimeng committed Oct 7, 2024
1 parent ebe3d00 commit 5c92d47
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 47 deletions.
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ ext {
junitVersion = '5.10.2'
jsoniterScalaVersion = '2.28.4'
lombokVersion = '1.18.34'
jacksonVersion = '2.18.0'
}

dependencies {
Expand All @@ -66,6 +67,11 @@ dependencies {
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion

// Jackson dependency for jmh
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: jacksonVersion
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: jacksonVersion
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-annotations', version: jacksonVersion
}

tasks.register('downloadTestData') {
Expand Down
55 changes: 55 additions & 0 deletions src/jmh/java/org/simdjson/Parse2VsJacksonBenchMark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.simdjson;

import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.TimeUnit;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;

@State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
public class Parse2VsJacksonBenchMark {
@Param({"/twitter.json"})
String fileName;
private byte[] buffer;
private final SimdJsonParser2 parser = new SimdJsonParser2("statuses.0.metadata", "metadata.0.created_at", "metadata.0.id",
"statuses.1.metadata", "metadata.1.created_at", "metadata.1.id");
private final ObjectMapper MAPPER = new ObjectMapper();

@Setup(Level.Trial)
public void setup() throws IOException {
try (InputStream is = ParseBenchmark.class.getResourceAsStream(fileName)) {
assert is != null;
buffer = is.readAllBytes();
}
}

@Benchmark
public void parseBySimdJson() {
String[] result = parser.parse(buffer, buffer.length);
}

@Benchmark
public void parseByJackson() throws Exception {
ArrayNode arrayNode = (ArrayNode) MAPPER.readTree(buffer).path("statuses");
String[] result = new String[6];
result[0] = arrayNode.get(0).path("metadata").toString();
result[1] = arrayNode.get(0).path("created_at").toString();
result[2] = arrayNode.get(0).path("id").toString();
result[3] = arrayNode.get(0).path("metadata").toString();
result[4] = arrayNode.get(0).path("created_at").toString();
result[5] = arrayNode.get(0).path("id").toString();
}
}
2 changes: 2 additions & 0 deletions src/main/java/org/simdjson/BitIndexes.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.simdjson;

import java.util.Arrays;

public class BitIndexes {

private final int[] indexes;
Expand Down
89 changes: 42 additions & 47 deletions src/main/java/org/simdjson/SimdJsonParser2.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,22 @@ static class JsonNode {
private final String[] emptyResult;
private JsonNode ptr;
private byte[] buffer;
private final int targetParseNum;
private final int expectParseCols;
// every time json string is processed, currentVersion will be incremented by 1
private long currentVersion = 0;
// pruning, when alreadyProcessedCols == NUM
private long alreadyProcessedCols = 0;
private long parseCols = 0;

public SimdJsonParser2(String... args) {
parser = new SimdJsonParser();
targetParseNum = args.length;
row = new JsonNode[targetParseNum];
result = new String[targetParseNum];
emptyResult = new String[targetParseNum];
expectParseCols = args.length;
row = new JsonNode[expectParseCols];
result = new String[expectParseCols];
emptyResult = new String[expectParseCols];
for (int i = 0; i < args.length; i++) {
emptyResult[i] = null;
}
for (int i = 0; i < targetParseNum; i++) {
for (int i = 0; i < expectParseCols; i++) {
JsonNode cur = root;
String[] paths = args[i].split("\\.");
for (int j = 0; j < paths.length; j++) {
Expand All @@ -65,7 +66,7 @@ public String[] parse(byte[] buffer, int len) {
if (buffer == null || buffer.length == 0) {
return emptyResult;
}
this.alreadyProcessedCols = 0;
this.parseCols = 0;
this.currentVersion++;
this.ptr = root;
this.buffer = buffer;
Expand All @@ -84,22 +85,34 @@ public String[] parse(byte[] buffer, int len) {
return getResult();
}

private void parseElement(String fieldName) {
if (fieldName == null) {
int start = bitIndexes.advance();
int realEnd = bitIndexes.advance();
while (realEnd > start) {
if (buffer[--realEnd] == '"') {
break;
}
}
fieldName = new String(buffer, start + 1, realEnd - start - 1);
private String parseField() {
int start = bitIndexes.advance();
int next = bitIndexes.peek();
String field = new String(buffer, start, next - start).trim();
if ("null".equalsIgnoreCase(field)) {
return null;
}
// field type is string or type is decimal
if (field.startsWith("\"")) {
field = field.substring(1, field.length() - 1);
}
return field;
}

private void parseElement(String expectFieldName) {
if (parseCols >= expectParseCols) {
return;
}
// if expectFieldName is null, parent is map, else is list
if (expectFieldName == null) {
expectFieldName = parseField();
bitIndexes.advance(); // skip :
}
if (!ptr.getChildren().containsKey(fieldName)) {
if (!ptr.getChildren().containsKey(expectFieldName)) {
skip(false);
return;
}
ptr = ptr.getChildren().get(fieldName);
ptr = ptr.getChildren().get(expectFieldName);
switch (buffer[bitIndexes.peek()]) {
case '{' -> {
parseMap();
Expand All @@ -110,7 +123,7 @@ private void parseElement(String fieldName) {
default -> {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
++parseCols;
}
}
ptr = ptr.getParent();
Expand All @@ -120,12 +133,12 @@ private void parseMap() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
++parseCols;
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && alreadyProcessedCols < targetParseNum) {
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}' && parseCols <= expectParseCols) {
parseElement(null);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
Expand All @@ -135,7 +148,7 @@ private void parseMap() {
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
++parseCols;
}
bitIndexes.advance();
}
Expand All @@ -144,13 +157,13 @@ private void parseList() {
if (ptr.getChildren() == null) {
ptr.setValue(skip(true));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
++parseCols;
return;
}
ptr.setStart(bitIndexes.peek());
bitIndexes.advance();
int i = 0;
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && alreadyProcessedCols < targetParseNum) {
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']' && parseCols <= expectParseCols) {
parseElement("" + i);
if (buffer[bitIndexes.peek()] == ',') {
bitIndexes.advance();
Expand All @@ -161,7 +174,7 @@ private void parseList() {
if (ptr.isLeaf()) {
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1));
ptr.setVersion(currentVersion);
++alreadyProcessedCols;
++parseCols;
}
bitIndexes.advance();
}
Expand Down Expand Up @@ -198,32 +211,14 @@ private String skip(boolean retainValue) {
bitIndexes.advance();
return retainValue ? new String(buffer, start, end - start + 1) : null;
}
case '"' -> {
bitIndexes.advance();
int realEnd = bitIndexes.peek();
while (realEnd > start) {
if (buffer[--realEnd] == '"') {
break;
}
}
return retainValue ? new String(buffer, start + 1, realEnd - start - 1) : null;
}
default -> {
bitIndexes.advance();
int realEnd = bitIndexes.peek();
while (realEnd >= start) {
--realEnd;
if (buffer[realEnd] >= '0' && buffer[realEnd] <= '9') {
break;
}
}
return retainValue ? new String(buffer, start, realEnd - start + 1) : null;
return parseField();
}
}
}

private String[] getResult() {
for (int i = 0; i < targetParseNum; i++) {
for (int i = 0; i < expectParseCols; i++) {
if (row[i].getVersion() < currentVersion) {
result[i] = null;
continue;
Expand Down
33 changes: 33 additions & 0 deletions src/test/java/org/simdjson/JsonMultiValueParsingTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.simdjson;

import static org.simdjson.testutils.SimdJsonAssertions.assertThat;
import static org.simdjson.testutils.TestUtils.toUtf8;

import org.junit.jupiter.api.Test;

public class JsonMultiValueParsingTest {
@Test
public void testParseMultiValue() {
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}");
SimdJsonParser2 parser = new SimdJsonParser2("field1.field2", "field1.field3", "field4", "field4.0", "field5");
String[] result = parser.parse(json, json.length);
assertThat(result[0]).isEqualTo("value2");
assertThat(result[1]).isEqualTo("3");
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]");
assertThat(result[3]).isEqualTo("value4");
assertThat(result[4]).isEqualTo(null);
}

@Test
public void testNonAsciiCharacters() {
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}");
SimdJsonParser2 parser = new SimdJsonParser2("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀");
// when
String[] result = parser.parse(json, json.length);
// then
assertThat(result[0]).isEqualTo("1");
assertThat(result[1]).isEqualTo("2");
assertThat(result[2]).isEqualTo("3");
assertThat(result[3]).isEqualTo("4");
}
}

0 comments on commit 5c92d47

Please sign in to comment.