Skip to content

Commit

Permalink
Fix charset handling
Browse files Browse the repository at this point in the history
  • Loading branch information
benmccann committed Mar 24, 2019
1 parent d410a90 commit 9dd7057
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
8 changes: 6 additions & 2 deletions core/src/main/java/tech/tablesaw/io/DataFrameReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.sql.ResultSet;
import java.sql.SQLException;

Expand Down Expand Up @@ -55,9 +56,12 @@ public Table url(String url) throws IOException {
*/
public Table url(URL url) throws IOException {
URLConnection connection = url.openConnection();
String mimeType = connection.getContentType();
String contentType = connection.getContentType();
String[] pair = contentType.split(";");
String mimeType = pair[0].trim();
Charset charset = pair.length == 0 ? Charset.defaultCharset() : Charset.forName(pair[1].split("=")[1].trim());
DataReader<?> reader = registry.getReaderForMimeType(mimeType);
return reader.read(new Source(connection.getInputStream()));
return reader.read(new Source(connection.getInputStream(), charset));
}

/**
Expand Down
29 changes: 22 additions & 7 deletions core/src/main/java/tech/tablesaw/io/Source.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Scanner;

Expand All @@ -17,31 +18,45 @@ public class Source {
protected final File file;
protected final Reader reader;
protected final InputStream inputStream;
protected final Charset charset;

public Source(File file) {
this.file = file;
this.reader = null;
this.inputStream = null;
this.charset = Charset.defaultCharset();
}

public Source(File file, Charset charset) {
this.file = file;
this.reader = null;
this.inputStream = null;
this.charset = charset;
}

public Source(Reader reader) {
this.file = null;
this.reader = reader;
this.inputStream = null;
this.charset = null;
}

public Source(InputStream inputStream) {
this.file = null;
this.reader = null;
this.inputStream = inputStream;
this.charset = Charset.defaultCharset();
}

public static Source fromString(String s) {
return new Source(new StringReader(s));
public Source(InputStream inputStream, Charset charset) {
this.file = null;
this.reader = null;
this.inputStream = inputStream;
this.charset = charset;
}

public static Source fromFile(String file) {
return new Source(new File(file));
public static Source fromString(String s) {
return new Source(new StringReader(s));
}

public static Source fromUrl(String url) throws IOException {
Expand Down Expand Up @@ -69,12 +84,12 @@ public Reader createReader(byte[] cachedBytes) throws IOException {
return new InputStreamReader(new ByteArrayInputStream(cachedBytes));
}
if (inputStream != null) {
return new InputStreamReader(inputStream);
return new InputStreamReader(inputStream, charset);
}
if (reader != null) {
return reader;
}
return new FileReader(file);
return new InputStreamReader(new FileInputStream(file), charset);
}

private static String loadUrl(String url) throws IOException {
Expand Down

0 comments on commit 9dd7057

Please sign in to comment.