Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jsonl support #1260

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ bin/
nbproject/private/
nbactions.xml
nb-configuration.xml

# VSCode
.vscode/
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.Writer;
import tech.tablesaw.io.Destination;
import tech.tablesaw.io.WriteOptions;
import tech.tablesaw.io.jsonl.JsonlWriteOptions;

public class JsonWriteOptions extends WriteOptions {

Expand Down
186 changes: 186 additions & 0 deletions json/src/main/java/tech/tablesaw/io/jsonl/JsonlReadOptions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package tech.tablesaw.io.jsonl;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.time.format.DateTimeFormatter;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;

import tech.tablesaw.api.ColumnType;
import tech.tablesaw.io.ReadOptions;
import tech.tablesaw.io.Source;

public class JsonlReadOptions extends ReadOptions {

private final String path;

protected JsonlReadOptions(Builder builder) {
super(builder);
this.path = builder.path;
}

public static Builder builder(Source source) {
return new Builder(source);
}

public static Builder builder(File file) {
return new Builder(file).tableName(file.getName());
}

public static Builder builder(String fileName) {
return new Builder(new File(fileName));
}

public static Builder builder(URL url) throws IOException {
return new Builder(url);
}

public static Builder builderFromFile(String fileName) {
return new Builder(new File(fileName));
}

public static Builder builderFromString(String contents) {
return new Builder(new StringReader(contents));
}

public static Builder builderFromUrl(String url) throws IOException {
return new Builder(new URL(url));
}

public static Builder builder(InputStream stream) {
return new Builder(stream);
}

public static Builder builder(Reader reader) {
return new Builder(reader);
}

public String path() {
return path;
}

public static class Builder extends ReadOptions.Builder {

private String path;

protected Builder(Source source) {
super(source);
}

protected Builder(URL url) throws IOException {
super(url);
}

public Builder(File file) {
super(file);
}

protected Builder(Reader reader) {
super(reader);
}

protected Builder(InputStream stream) {
super(stream);
}

@Override
public JsonlReadOptions build() {
return new JsonlReadOptions(this);
}

// Override super-class setters to return an instance of this class

@Override
public Builder header(boolean header) {
super.header(header);
return this;
}

@Override
public Builder tableName(String tableName) {
super.tableName(tableName);
return this;
}

@Override
public Builder sample(boolean sample) {
super.sample(sample);
return this;
}

@Override
public Builder dateFormat(DateTimeFormatter dateFormat) {
super.dateFormat(dateFormat);
return this;
}

@Override
public Builder timeFormat(DateTimeFormatter timeFormat) {
super.timeFormat(timeFormat);
return this;
}

@Override
public Builder dateTimeFormat(DateTimeFormatter dateTimeFormat) {
super.dateTimeFormat(dateTimeFormat);
return this;
}

@Override
public Builder locale(Locale locale) {
super.locale(locale);
return this;
}

@Override
public Builder missingValueIndicator(String... missingValueIndicators) {
super.missingValueIndicator(missingValueIndicators);
return this;
}

@Override
public Builder minimizeColumnSizes() {
super.minimizeColumnSizes();
return this;
}

/**
* @param path the JSON Pointer path used to select a sub-tree in the main
* document
*/
public Builder path(String path) {
this.path = path;
return this;
}

@Override
public Builder columnTypes(ColumnType[] columnTypes) {
super.columnTypes(columnTypes);
return this;
}

@Override
public Builder columnTypes(Function<String, ColumnType> columnTypeFunction) {
super.columnTypes(columnTypeFunction);
return this;
}

@Override
public Builder columnTypesPartial(Function<String, Optional<ColumnType>> columnTypeFunction) {
super.columnTypesPartial(columnTypeFunction);
return this;
}

@Override
public Builder columnTypesPartial(Map<String, ColumnType> columnTypeByName) {
super.columnTypesPartial(columnTypeByName);
return this;
}
}
}
110 changes: 110 additions & 0 deletions json/src/main/java/tech/tablesaw/io/jsonl/JsonlReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package tech.tablesaw.io.jsonl;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.github.wnameless.json.flattener.JsonFlattener;

import tech.tablesaw.api.Table;
import tech.tablesaw.io.DataReader;
import tech.tablesaw.io.ReadOptions;
import tech.tablesaw.io.ReaderRegistry;
import tech.tablesaw.io.RuntimeIOException;
import tech.tablesaw.io.Source;
import tech.tablesaw.io.TableBuildingUtils;

public class JsonlReader implements DataReader<JsonlReadOptions> {

private static final JsonlReader INSTANCE = new JsonlReader();
private static final ObjectMapper mapper = new ObjectMapper();

static {
register(Table.defaultReaderRegistry);
}

public static void register(ReaderRegistry registry) {
registry.registerExtension("jsonl", INSTANCE);
registry.registerMimeType("text/jsonl", INSTANCE);
registry.registerMimeType("application/jsonl+json", INSTANCE);
registry.registerOptions(JsonlReadOptions.class, INSTANCE);
}

@Override
public Table read(JsonlReadOptions options) {
ObjectReader stream = mapper.readerFor(JsonNode.class);
try {
Reader reader = options.source().createReader(null);
JsonParser parser = stream.createParser(reader);
Iterator<JsonNode> iter = stream.readValues(parser);
return convertObjects(iter, options);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}

private Table convertObjects(Iterator<JsonNode> iter, ReadOptions options) {
// flatten each object inside the array
StringBuilder result = new StringBuilder("[");
boolean first = true;
for (; iter.hasNext();) {
JsonNode rowObj = iter.next();
String flattenedRow = null;
try {
flattenedRow = JsonFlattener.flatten(mapper.writeValueAsString(rowObj));
} catch (JsonProcessingException e) {
throw new RuntimeIOException(e);
}
if (!first) {
result.append(",");
}
first = false;
result.append(flattenedRow);
}
String flattenedJsonString = result.append("]").toString();
JsonNode flattenedJsonObj = null;
try {
flattenedJsonObj = mapper.readTree(flattenedJsonString);
} catch (JsonProcessingException e) {
throw new RuntimeIOException(e);
}

Set<String> colNames = new LinkedHashSet<>();
for (JsonNode row : flattenedJsonObj) {
Iterator<String> fieldNames = row.fieldNames();
while (fieldNames.hasNext()) {
colNames.add(fieldNames.next());
}
}

List<String> columnNames = new ArrayList<>(colNames);
List<String[]> dataRows = new ArrayList<>();
for (JsonNode node : flattenedJsonObj) {
String[] arr = new String[columnNames.size()];
for (int i = 0; i < columnNames.size(); i++) {
if (node.has(columnNames.get(i))) {
arr[i] = node.get(columnNames.get(i)).asText();
} else {
arr[i] = null;
}
}
dataRows.add(arr);
}

return TableBuildingUtils.build(columnNames, dataRows, options);
}

@Override
public Table read(Source source) {
return read(JsonlReadOptions.builder(source).build());
}
}
31 changes: 31 additions & 0 deletions json/src/main/java/tech/tablesaw/io/jsonl/JsonlWriteOptions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package tech.tablesaw.io.jsonl;

import java.io.Writer;
import tech.tablesaw.io.Destination;
import tech.tablesaw.io.WriteOptions;

public class JsonlWriteOptions extends WriteOptions {

private JsonlWriteOptions(Builder builder) {
super(builder);
}

public static Builder builder(Writer writer) {
return new Builder(new Destination(writer));
}

public static Builder builder(Destination destination) {
return new Builder(destination);
}

public static class Builder extends WriteOptions.Builder {

protected Builder(Destination destination) {
super(destination);
}

public JsonlWriteOptions build() {
return new JsonlWriteOptions(this);
}
}
}
Loading