Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature supporting of different charsets #184

Merged
merged 12 commits into from
Mar 1, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 36 additions & 13 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf4/H4header.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -104,7 +105,29 @@ public static void useHdfEos(boolean val) {
private Map<Short, Vinfo> refnoMap = new HashMap<>();

private MemTracker memTracker;
private PrintWriter debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
private PrintWriter debugOut;

private final Charset valueCharset;

public H4header() {
valueCharset = StandardCharsets.UTF_8;
debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
}

H4header(H4iosp h4iosp) {
valueCharset = h4iosp.getValueCharset().orElse(StandardCharsets.UTF_8);
debugOut = new PrintWriter(new OutputStreamWriter(System.out, valueCharset));
}

/**
* Return defined {@link Charset value charset} that
* will be used by reading netcdf file.
lesserwhirls marked this conversation as resolved.
Show resolved Hide resolved
*
* @return {@link Charset value charset}
*/
protected Charset getValueCharset() {
return valueCharset;
}

public boolean isEos() {
return isEos;
Expand Down Expand Up @@ -429,11 +452,11 @@ private Attribute makeAttribute(TagVH vh) throws IOException {
case 3:
case 4:
if (nelems == 1)
att = new Attribute(name, raf.readStringMax(size));
att = new Attribute(name, raf.readStringMax(size, valueCharset));
else {
String[] vals = new String[nelems];
for (int i = 0; i < nelems; i++)
vals[i] = raf.readStringMax(size);
vals[i] = raf.readStringMax(size, valueCharset);
att = new Attribute(name, Array.factory(DataType.STRING, new int[] {nelems}, vals));
}
break;
Expand Down Expand Up @@ -1139,7 +1162,7 @@ List<DataChunk> readChunks(NetcdfFile ncfile) throws IOException {

String read() throws IOException {
raf.seek(data.offset);
return raf.readString(data.length);
return raf.readString(data.length, valueCharset);
}

public String toString() {
Expand Down Expand Up @@ -1639,7 +1662,7 @@ protected void read() throws IOException {
major = raf.readInt();
minor = raf.readInt();
release = raf.readInt();
name = raf.readStringMax(length - 12);
name = raf.readStringMax(length - 12, valueCharset);
}

public String value() {
Expand All @@ -1661,7 +1684,7 @@ private class TagText extends Tag {

protected void read() throws IOException {
raf.seek(offset);
text = raf.readStringMax(length);
text = raf.readStringMax(length, valueCharset);
}

public String detail() {
Expand All @@ -1683,7 +1706,7 @@ protected void read() throws IOException {
raf.seek(offset);
obj_tagno = raf.readShort();
obj_refno = raf.readShort();
text = raf.readStringMax(length - 4).trim();
text = raf.readStringMax(length - 4, valueCharset).trim();
}

public String detail() {
Expand Down Expand Up @@ -1840,7 +1863,7 @@ protected void read(int n) throws IOException {
int start = 0;
for (int i = 0; i < length; i++) {
if (b[i] == 0) {
text[count] = new String(b, start, i - start, StandardCharsets.UTF_8);
text[count] = new String(b, start, i - start, valueCharset);
count++;
if (count == n)
break;
Expand Down Expand Up @@ -1956,9 +1979,9 @@ protected void read() throws IOException {
elem_ref[i] = raf.readShort();

short len = raf.readShort();
name = raf.readStringMax(len);
name = raf.readStringMax(len, valueCharset);
len = raf.readShort();
className = raf.readStringMax(len);
className = raf.readStringMax(len, valueCharset);

extag = raf.readShort();
exref = raf.readShort();
Expand Down Expand Up @@ -2032,14 +2055,14 @@ protected void read() throws IOException {
fld_name = new String[nfields];
for (int i = 0; i < nfields; i++) {
short len = raf.readShort();
fld_name[i] = raf.readStringMax(len);
fld_name[i] = raf.readStringMax(len, valueCharset);
}

short len = raf.readShort();
name = raf.readStringMax(len);
name = raf.readStringMax(len, valueCharset);

len = raf.readShort();
className = raf.readStringMax(len);
className = raf.readStringMax(len, valueCharset);

extag = raf.readShort();
exref = raf.readShort();
Expand Down
53 changes: 46 additions & 7 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf4/H4iosp.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Optional;
import ucar.ma2.Array;
import ucar.ma2.ArrayStructure;
import ucar.ma2.ArrayStructureBB;
Expand Down Expand Up @@ -41,7 +43,8 @@ public class H4iosp extends AbstractIOServiceProvider {
private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(H4iosp.class);
private static boolean showLayoutTypes;

private H4header header = new H4header();
private H4header header;
private Charset valueCharset;

@Override
public boolean isValidFile(RandomAccessFile raf) throws IOException {
Expand All @@ -50,8 +53,9 @@ public boolean isValidFile(RandomAccessFile raf) throws IOException {

@Override
public String getFileTypeId() {
if (header.isEos())
if (header != null && header.isEos()) {
return "HDF4-EOS";
}
return DataFormatType.HDF4.getDescription();
}

Expand All @@ -64,11 +68,24 @@ public String getFileTypeDescription() {
public void open(RandomAccessFile raf, NetcdfFile ncfile, CancelTask cancelTask) throws IOException {
super.open(raf, ncfile, cancelTask);
Group.Builder rootGroup = Group.builder(null).setName("").setNcfile(ncfile);
header.read(raf, rootGroup, null);
getHeader().read(raf, rootGroup, null);
ncfile.setRootGroup(rootGroup.build(null));
ncfile.finish();
}

/**
* Return header for reading netcdf file.
lesserwhirls marked this conversation as resolved.
Show resolved Hide resolved
* Create it if it's not already created.
*
* @return header for reading netcdf file.
*/
private H4header getHeader() {
if (header == null) {
header = new H4header(this);
}
return header;
}

@Override
public boolean isBuilder() {
return true;
Expand All @@ -79,7 +96,7 @@ public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask canc
super.open(raf, rootGroup.getNcfile(), cancelTask);

raf.order(RandomAccessFile.BIG_ENDIAN);
header = new H4header();
header = new H4header(this);
header.read(raf, rootGroup, null);
}

Expand Down Expand Up @@ -475,14 +492,36 @@ public ByteBuffer getByteBuffer() throws IOException {
@Override
public void reacquire() throws IOException {
super.reacquire();
header.raf = this.raf;
getHeader().raf = this.raf;
}

public Object sendIospMessage(Object message) {
if (message.toString().equals("header"))
return header;
if (message instanceof Charset) {
setValueCharset((Charset) message);
}
if (message.toString().equals("header")) {
return getHeader();
}
return super.sendIospMessage(message);
}

/**
* Return {@link Charset value charset} if it was defined. Definition of charset
* occurs by sending a charset as a message using the {@link #sendIospMessage}
* method.
*
* @return {@link Charset value charset} if it was defined.
*/
protected Optional<Charset> getValueCharset() {
return Optional.ofNullable(valueCharset);
}

/**
* Define {@link Charset value charset}.
*
* @param charset may be null.
*/
protected void setValueCharset(Charset charset) {
lesserwhirls marked this conversation as resolved.
Show resolved Hide resolved
valueCharset = charset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.nio.ShortBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Formatter;
Expand Down Expand Up @@ -177,10 +178,23 @@ public static boolean isValidFile(RandomAccessFile raf) throws IOException {
private PrintWriter debugOut;
private MemTracker memTracker;

private final Charset valueCharset;

H5headerNew(RandomAccessFile myRaf, Group.Builder root, H5iospNew h5iosp) {
this.raf = myRaf;
this.root = root;
this.h5iosp = h5iosp;
valueCharset = h5iosp.getValueCharset().orElse(StandardCharsets.UTF_8);
}

/**
* Return defined {@link Charset value charset} that
* will be used by reading netcdf file.
lesserwhirls marked this conversation as resolved.
Show resolved Hide resolved
*
* @return {@link Charset value charset}
*/
protected Charset getValueCharset() {
return valueCharset;
}

public void read(PrintWriter debugPS) throws IOException {
Expand All @@ -189,7 +203,7 @@ public void read(PrintWriter debugPS) throws IOException {
} else if (debug1 || debugContinueMessage || debugCreationOrder || debugDetail || debugDimensionScales
|| debugGroupBtree || debugHardLink || debugHeap || debugPos || debugReference || debugTracker || debugV
|| debugSoftLink || warnings) {
debugOut = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
debugOut = new PrintWriter(new OutputStreamWriter(System.out, valueCharset));
}
h5objects = new H5objects(this, debugOut, memTracker);

Expand Down Expand Up @@ -827,7 +841,7 @@ private String addDimension(Group.Builder parent, H5Group h5group, String name,

Dimension d = h5group.dimMap.get(dimName); // first look in current group
if (d == null) { // create if not found
d = Dimension.builder(dimName, length).setIsUnlimited(isUnlimited).build();
d = Dimension.builder().setName(dimName).setIsUnlimited(isUnlimited).setLength(length).build();
h5group.dimMap.put(dimName, d);
h5group.dimList.add(d);
parent.addDimension(d);
Expand Down Expand Up @@ -1194,7 +1208,7 @@ private String convertString(byte[] b) {
break;
count++;
}
return new String(b, 0, count, StandardCharsets.UTF_8); // all strings are considered to be UTF-8 unicode
return new String(b, 0, count, valueCharset); // all strings are considered to be UTF-8 unicode
}

private String convertString(byte[] b, int start, int len) {
Expand All @@ -1205,8 +1219,8 @@ private String convertString(byte[] b, int start, int len) {
break;
count++;
}
return new String(b, start, count - start, StandardCharsets.UTF_8); // all strings are considered to be UTF-8
// unicode
return new String(b, start, count - start, valueCharset); // all strings are considered to be UTF-8
// unicode
}

protected Array convertEnums(Map<Integer, String> map, DataType dataType, Array values) {
Expand Down Expand Up @@ -2103,7 +2117,7 @@ String readHeapString(long heapIdAddress) throws IOException {
if (ho.dataSize > 1000 * 1000)
return String.format("Bad HeapObject.dataSize=%s", ho);
raf.seek(ho.dataPos);
return raf.readString((int) ho.dataSize);
return raf.readString((int) ho.dataSize, valueCharset);
}

/**
Expand All @@ -2120,7 +2134,7 @@ String readHeapString(ByteBuffer bb, int pos) throws IOException {
if (ho == null)
throw new IllegalStateException("Cant find Heap Object,heapId=" + heapId);
raf.seek(ho.dataPos);
return raf.readString((int) ho.dataSize);
return raf.readString((int) ho.dataSize, valueCharset);
}

Array readHeapVlen(ByteBuffer bb, int pos, DataType dataType, int endian) throws IOException, InvalidRangeException {
Expand Down
31 changes: 31 additions & 0 deletions cdm/core/src/main/java/ucar/nc2/internal/iosp/hdf5/H5iospNew.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.util.Optional;
import ucar.ma2.Array;
import ucar.ma2.ArrayStructure;
import ucar.ma2.ArrayStructureBB;
Expand Down Expand Up @@ -105,6 +107,7 @@ public boolean isBuilder() {
private H5headerNew header;
private boolean isEos;
boolean includeOriginalAttributes;
private Charset valueCharset;

@Override
public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask cancelTask) throws IOException {
Expand All @@ -126,6 +129,34 @@ public void build(RandomAccessFile raf, Group.Builder rootGroup, CancelTask canc
}
}

@Override
public Object sendIospMessage(Object message) {
if (message instanceof Charset) {
setValueCharset((Charset) message);
}
return super.sendIospMessage(message);
}

/**
* Return {@link Charset value charset} if it was defined. Definition of charset
* occurs by sending a charset as a message using the {@link #sendIospMessage}
* method.
*
* @return {@link Charset value charset} if it was defined.
*/
protected Optional<Charset> getValueCharset() {
return Optional.ofNullable(valueCharset);
}

/**
* Define {@link Charset value charset}.
*
* @param charset may be null.
*/
protected void setValueCharset(Charset charset) {
lesserwhirls marked this conversation as resolved.
Show resolved Hide resolved
valueCharset = charset;
}

@Override
public void open(RandomAccessFile raf, NetcdfFile ncfile, CancelTask cancelTask) throws IOException {
super.open(raf, ncfile, cancelTask);
Expand Down
Loading