From 001c35866e11bde15f03cee0ed9879d865780db5 Mon Sep 17 00:00:00 2001 From: Clemens Vasters Date: Tue, 30 Apr 2024 18:22:11 +0200 Subject: [PATCH 1/3] Plain JSON Mode --- .../src/apache/main/Generic/GenericReader.cs | 19 +- .../main/Generic/PreresolvingDatumReader.cs | 8 +- .../main/Generic/PreresolvingDatumWriter.cs | 15 +- .../src/apache/main/IO/BinaryDecoder.cs | 23 + .../src/apache/main/IO/BinaryEncoder.cs | 32 ++ lang/csharp/src/apache/main/IO/Decoder.cs | 8 + lang/csharp/src/apache/main/IO/Encoder.cs | 7 + lang/csharp/src/apache/main/IO/JsonDecoder.cs | 420 +++++++++++++++- lang/csharp/src/apache/main/IO/JsonEncoder.cs | 100 +++- .../src/apache/main/IO/JsonEncoderMode.cs | 35 ++ .../main/IO/Parsing/JsonGrammarGenerator.cs | 29 +- .../src/apache/main/IO/Parsing/Symbol.cs | 6 +- .../src/apache/main/IO/ParsingDecoder.cs | 5 +- .../src/apache/main/IO/ParsingEncoder.cs | 5 +- lang/csharp/src/apache/main/Schema/Aliases.cs | 4 +- .../src/apache/main/Schema/EnumSchema.cs | 76 ++- lang/csharp/src/apache/main/Schema/Field.cs | 50 +- .../src/apache/main/Schema/FixedSchema.cs | 2 +- .../src/apache/main/Schema/NamedSchema.cs | 35 +- .../src/apache/main/Schema/RecordSchema.cs | 12 +- lang/csharp/src/apache/main/Schema/Schema.cs | 2 +- .../src/apache/main/Schema/SchemaName.cs | 25 +- lang/csharp/src/apache/main/Util/Date.cs | 36 +- lang/csharp/src/apache/main/Util/Decimal.cs | 73 ++- .../main/Util/LocalTimestampMicrosecond.cs | 42 +- .../main/Util/LocalTimestampMillisecond.cs | 42 +- .../src/apache/main/Util/LogicalType.cs | 8 + .../src/apache/main/Util/TimeMicrosecond.cs | 42 +- .../src/apache/main/Util/TimeMillisecond.cs | 46 +- .../apache/main/Util/TimestampMicrosecond.cs | 42 +- .../apache/main/Util/TimestampMillisecond.cs | 40 +- lang/csharp/src/apache/main/Util/Uuid.cs | 12 +- .../src/apache/test/IO/JsonCodecTests.cs | 454 ++++++++++++------ .../src/apache/test/Schema/AliasesTests.cs | 8 +- .../src/apache/test/Schema/SchemaTests.cs | 9 +- 35 files changed, 1481 insertions(+), 291 deletions(-) create mode 100644 lang/csharp/src/apache/main/IO/JsonEncoderMode.cs diff --git a/lang/csharp/src/apache/main/Generic/GenericReader.cs b/lang/csharp/src/apache/main/Generic/GenericReader.cs index 0b945b9ff5e..84e766abbcc 100644 --- a/lang/csharp/src/apache/main/Generic/GenericReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericReader.cs @@ -227,7 +227,8 @@ public object Read(object reuse, Schema writerSchema, Schema readerSchema, Decod case Schema.Type.Union: return ReadUnion(reuse, (UnionSchema)writerSchema, readerSchema, d); case Schema.Type.Logical: - return ReadLogical(reuse, (LogicalSchema)writerSchema, readerSchema, d); + LogicalSchema writerLogicalSchema = writerSchema as LogicalSchema; + return Read(writerSchema.Tag, readerSchema, ()=>d.ReadLogicalTypeValue(writerLogicalSchema)); default: throw new AvroException("Unknown schema type: " + writerSchema); } @@ -552,22 +553,6 @@ protected virtual object ReadUnion(object reuse, UnionSchema writerSchema, Schem return Read(reuse, ws, readerSchema, d); } - /// - /// Deserializes an object based on the writer's logical schema. Uses the underlying logical type to convert - /// the value to the logical type. - /// - /// If appropriate, uses this object instead of creating a new one. - /// The UnionSchema that the writer used. - /// The schema the reader uses. - /// The decoder for serialization. - /// The deserialized object. - protected virtual object ReadLogical(object reuse, LogicalSchema writerSchema, Schema readerSchema, Decoder d) - { - LogicalSchema ls = (LogicalSchema)readerSchema; - - return writerSchema.LogicalType.ConvertToLogicalValue(Read(reuse, writerSchema.BaseSchema, ls.BaseSchema, d), ls); - } - /// /// Deserializes a fixed object and returns the object. The default implementation uses CreateFixed() /// and GetFixedBuffer() and returns what CreateFixed() returned. diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs index 53270faecdb..07315c0e44d 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs @@ -178,7 +178,7 @@ private ReadItem ResolveReader(Schema writerSchema, Schema readerSchema) case Schema.Type.Union: return ResolveUnion((UnionSchema)writerSchema, readerSchema); case Schema.Type.Logical: - return ResolveLogical((LogicalSchema)writerSchema, (LogicalSchema)readerSchema); + return Read(d=>d.ReadLogicalTypeValue((LogicalSchema)writerSchema)); default: throw new AvroException("Unknown schema type: " + writerSchema); } @@ -400,12 +400,6 @@ private object ReadArray(object reuse, Decoder decoder, ArrayAccess arrayAccess, return array; } - private ReadItem ResolveLogical(LogicalSchema writerSchema, LogicalSchema readerSchema) - { - var baseReader = ResolveReader(writerSchema.BaseSchema, readerSchema.BaseSchema); - return (r, d) => readerSchema.LogicalType.ConvertToLogicalValue(baseReader(r, d), readerSchema); - } - private ReadItem ResolveFixed(FixedSchema writerSchema, FixedSchema readerSchema) { if (readerSchema.Size != writerSchema.Size) diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs index dd21f62ed80..cc82fc7eca1 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs @@ -19,6 +19,8 @@ using System.Collections; using System.Collections.Generic; using System.Linq; +using System.Runtime.InteropServices; +using Avro.Util; using Encoder = Avro.IO.Encoder; namespace Avro.Generic @@ -100,7 +102,7 @@ private WriteItem ResolveWriter( Schema schema ) case Schema.Type.Union: return ResolveUnion((UnionSchema)schema); case Schema.Type.Logical: - return ResolveLogical((LogicalSchema)schema); + return (v,e) => Write(v, schema.Tag, (w)=>e.WriteLogicalTypeValue(w, (LogicalSchema)schema)); default: return (v, e) => Error(schema, v); } @@ -235,17 +237,6 @@ private void WriteArray(WriteItem itemWriter, object array, Encoder encoder) encoder.WriteArrayEnd(); } - /// - /// Serializes a logical value object by using the underlying logical type to convert the value - /// to its base value. - /// - /// The logical schema. - protected WriteItem ResolveLogical(LogicalSchema schema) - { - var baseWriter = ResolveWriter(schema.BaseSchema); - return (d, e) => baseWriter(schema.LogicalType.ConvertToBaseValue(d, schema), e); - } - private WriteItem ResolveMap(MapSchema mapSchema) { var itemWriter = ResolveWriter(mapSchema.ValueSchema); diff --git a/lang/csharp/src/apache/main/IO/BinaryDecoder.cs b/lang/csharp/src/apache/main/IO/BinaryDecoder.cs index 56aaa6e1815..107038a3cfb 100644 --- a/lang/csharp/src/apache/main/IO/BinaryDecoder.cs +++ b/lang/csharp/src/apache/main/IO/BinaryDecoder.cs @@ -172,6 +172,29 @@ public void ReadFixed(byte[] buffer, int start, int length) Read(buffer, start, length); } + /// + public object ReadLogicalTypeValue(LogicalSchema logicalSchema) + { + Schema baseSchema = logicalSchema.BaseSchema; + switch(baseSchema.Tag) + { + case Schema.Type.Int: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadInt(), logicalSchema); + case Schema.Type.Long: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadLong(), logicalSchema); + case Schema.Type.Bytes: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadBytes(), logicalSchema); + case Schema.Type.String: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadString(), logicalSchema); + case Schema.Type.Fixed: + byte[] fixedValue = new byte[((FixedSchema)baseSchema).Size]; + ReadFixed(fixedValue); + return logicalSchema.LogicalType.ConvertToLogicalValue(fixedValue, logicalSchema); + default: + throw new AvroException($"Unsupported logical type: {logicalSchema.Tag}"); + } + } + /// /// Skips over a null value. /// diff --git a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs index 91eb0e5553b..f6c25936c5f 100644 --- a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs +++ b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs @@ -206,6 +206,38 @@ public void WriteFixed(byte[] data, int start, int len) stream.Write(data, start, len); } + /// + public void WriteLogicalTypeValue(object value, LogicalSchema schema) + { + var baseValue = schema.LogicalType.ConvertToBaseValue(value, schema); + switch (baseValue) + { + case int i: + WriteInt(i); + break; + case long l: + WriteLong(l); + break; + case float f: + WriteFloat(f); + break; + case double d: + WriteDouble(d); + break; + case byte[] bytes: + WriteBytes(bytes); + break; + case string s: + WriteString(s); + break; + case Avro.Generic.GenericFixed fixedValue: + WriteFixed(fixedValue.Value); + break; + default: + throw new AvroTypeException($"Unsupported conversion from {baseValue.GetType()}"); + } + } + private void writeBytes(byte[] bytes) { stream.Write(bytes, 0, bytes.Length); diff --git a/lang/csharp/src/apache/main/IO/Decoder.cs b/lang/csharp/src/apache/main/IO/Decoder.cs index 1476b0e1a51..281f7c79b2f 100644 --- a/lang/csharp/src/apache/main/IO/Decoder.cs +++ b/lang/csharp/src/apache/main/IO/Decoder.cs @@ -136,6 +136,14 @@ public interface Decoder /// Number of bytes to read void ReadFixed(byte[] buffer, int start, int length); + + /// + /// Reads a logical type value. + /// + /// Schema of the logical type + /// + object ReadLogicalTypeValue(LogicalSchema schema); + /// /// Skips a null Avro type on the stream. /// diff --git a/lang/csharp/src/apache/main/IO/Encoder.cs b/lang/csharp/src/apache/main/IO/Encoder.cs index 0c1712af430..93a3ce3defe 100644 --- a/lang/csharp/src/apache/main/IO/Encoder.cs +++ b/lang/csharp/src/apache/main/IO/Encoder.cs @@ -188,6 +188,13 @@ public interface Encoder /// Number of bytes to write. void WriteFixed(byte[] data, int start, int len); + /// + /// Writes a logical type value + /// + /// Value to be written + /// Logical type schema + void WriteLogicalTypeValue(object value, LogicalSchema schema); + /// /// Flushes the encoder. /// diff --git a/lang/csharp/src/apache/main/IO/JsonDecoder.cs b/lang/csharp/src/apache/main/IO/JsonDecoder.cs index 48d726e3083..bbb250c3d95 100644 --- a/lang/csharp/src/apache/main/IO/JsonDecoder.cs +++ b/lang/csharp/src/apache/main/IO/JsonDecoder.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,14 @@ */ using System; +using System.CodeDom; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using Avro.IO.Parsing; using Newtonsoft.Json; +using Newtonsoft.Json.Linq; namespace Avro.IO { @@ -35,6 +37,7 @@ public class JsonDecoder : ParsingDecoder { private JsonReader reader; private readonly Stack reorderBuffers = new Stack(); + private readonly JsonMode mode; private ReorderBuffer currentReorderBuffer; private class ReorderBuffer @@ -45,33 +48,47 @@ private class ReorderBuffer public JsonReader OrigParser { get; set; } } - private JsonDecoder(Symbol root, Stream stream) : base(root) + private JsonDecoder(Symbol root, Stream stream, JsonMode mode) : base(root) { + this.mode = mode; Configure(stream); } - private JsonDecoder(Symbol root, string str) : base(root) + private JsonDecoder(Symbol root, string str, JsonMode mode) : base(root) { + this.mode = mode; Configure(str); } + private JsonDecoder(Symbol root, JsonReader reader, JsonMode mode) : base(root) + { + this.mode = mode; + Parser.Reset(); + reorderBuffers.Clear(); + currentReorderBuffer = null; + this.reader = reader; + this.reader.DateTimeZoneHandling = DateTimeZoneHandling.RoundtripKind; + this.reader.DateParseHandling = DateParseHandling.DateTime; + this.reader.Read(); + } + /// /// Initializes a new instance of the class. /// - public JsonDecoder(Schema schema, Stream stream) : this(GetSymbol(schema), stream) + public JsonDecoder(Schema schema, Stream stream, JsonMode mode = JsonMode.AvroJson) : this(GetSymbol(schema, mode), stream, mode) { } /// /// Initializes a new instance of the class. /// - public JsonDecoder(Schema schema, string str) : this(GetSymbol(schema), str) + public JsonDecoder(Schema schema, string str, JsonMode mode = JsonMode.AvroJson) : this(GetSymbol(schema, mode), str, mode) { } - private static Symbol GetSymbol(Schema schema) + private static Symbol GetSymbol(Schema schema, JsonMode mode) { - return (new JsonGrammarGenerator()).Generate(schema); + return (new JsonGrammarGenerator(mode)).Generate(schema); } /// @@ -86,6 +103,8 @@ public void Configure(Stream stream) reorderBuffers.Clear(); currentReorderBuffer = null; reader = new JsonTextReader(new StreamReader(stream)); + reader.DateTimeZoneHandling = DateTimeZoneHandling.RoundtripKind; + reader.DateParseHandling = DateParseHandling.DateTime; reader.Read(); } @@ -101,6 +120,8 @@ public void Configure(string str) reorderBuffers.Clear(); currentReorderBuffer = null; reader = new JsonTextReader(new StringReader(str)); + reader.DateTimeZoneHandling = DateTimeZoneHandling.RoundtripKind; + reader.DateParseHandling = DateParseHandling.DateTime; reader.Read(); } @@ -172,6 +193,21 @@ public override long ReadLong() } } + private decimal ReadDecimal(Symbol symbol) + { + Advance(symbol); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + decimal result = Convert.ToDecimal(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("decimal"); + } + } + /// public override float ReadFloat() { @@ -229,6 +265,39 @@ public override string ReadString() return result; } + private DateTime ReadDateTime() + { + Advance(Symbol.JsonDateTime); + if (reader.TokenType == JsonToken.Date) + { + DateTime result = (DateTime)reader.Value; + reader.Read(); + return result; + } + else if (reader.TokenType == JsonToken.String) + { + string dateString = reader.Value.ToString(); + if (DateTime.TryParseExact(dateString, "yyyy-MM-dd", null, System.Globalization.DateTimeStyles.RoundtripKind, out DateTime result)) + { + reader.Read(); + return result; + } + else if (DateTime.TryParseExact(dateString, "O", null, System.Globalization.DateTimeStyles.RoundtripKind, out DateTime result1)) + { + reader.Read(); + return result1; + } + else + { + throw new AvroTypeException("Error parsing date string"); + } + } + else + { + throw TypeError("date"); + } + } + /// public override void SkipString() { @@ -270,9 +339,23 @@ public override byte[] ReadBytes() private byte[] ReadByteArray() { - Encoding iso = Encoding.GetEncoding("ISO-8859-1"); - byte[] result = iso.GetBytes(Convert.ToString(reader.Value)); - return result; + if (mode == JsonMode.PlainJson) + { + try + { + return Convert.FromBase64String(Convert.ToString(reader.Value)); + } + catch (FormatException e) + { + throw new AvroTypeException("Error decoding base64 string: " + e.Message); + } + } + else + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + byte[] result = iso.GetBytes(Convert.ToString(reader.Value)); + return result; + } } /// @@ -492,26 +575,151 @@ public override int ReadUnionIndex() Advance(Symbol.Union); Symbol.Alternative a = (Symbol.Alternative)Parser.PopSymbol(); - string label; + string label = null; if (reader.TokenType == JsonToken.Null) { label = "null"; } - else if (reader.TokenType == JsonToken.StartObject) + else if (mode == JsonMode.PlainJson) { - reader.Read(); - if (reader.TokenType == JsonToken.PropertyName) + var symbolCount = a.Symbols.Length; + + if (reader.TokenType == JsonToken.Boolean) { - label = Convert.ToString(reader.Value); + label = "boolean"; + } + else if (reader.TokenType == JsonToken.Integer) + { + for (int i = 0; i < symbolCount; i++) + { + var symbol = a.Symbols[i]; + if (symbol == Symbol.Int || symbol == Symbol.Long) + { + label = a.Labels[i]; + break; + } + } + } + else if (reader.TokenType == JsonToken.Float) + { + for (int i = 0; i < symbolCount; i++) + { + var symbol = a.Symbols[i]; + if (symbol == Symbol.Float || symbol == Symbol.Double || symbol == Symbol.Fixed) + { + label = a.Labels[i]; + break; + } + } + } + else if (reader.TokenType == JsonToken.Bytes) + { + for (int i = 0; i < symbolCount; i++) + { + var symbol = a.Symbols[i]; + if (symbol == Symbol.Bytes || symbol == Symbol.Fixed) + { + label = a.Labels[i]; + break; + } + } + } + else if (reader.TokenType == JsonToken.Date || reader.TokenType == JsonToken.String) + { + if (mode == JsonMode.PlainJson) + { + Symbol pushSymbol = Symbol.String; + string dateString = reader.Value.ToString(); + + if (reader.TokenType == JsonToken.Date || + DateTime.TryParseExact(dateString, "yyyy-MM-dd", null, System.Globalization.DateTimeStyles.RoundtripKind, out DateTime _) || + DateTime.TryParseExact(dateString, "O", null, System.Globalization.DateTimeStyles.RoundtripKind, out DateTime _ )) + { + label = "date"; + pushSymbol = Symbol.JsonDateTime; + } + else + { + label = "string"; + } + if (label == "date") + { + for (int i = 0; i < symbolCount; i++) + { + var symbol = a.Symbols[i]; + if (symbol == Symbol.JsonDateTime) + { + label = a.Labels[i]; + break; + } + } + } + int n1 = a.FindLabel(label); + if (n1 < 0) + { + throw new AvroTypeException("Unknown union branch " + label); + } + Parser.PushSymbol(pushSymbol); + return n1; + } + else + { + label = "string"; + } + } + } + + if (reader.TokenType == JsonToken.StartObject) + { + if ( mode == JsonMode.AvroJson) + { + // in Avro JSON, the object is tagged with the type reader.Read(); - Parser.PushSymbol(Symbol.UnionEnd); + if (reader.TokenType == JsonToken.PropertyName) + { + label = Convert.ToString(reader.Value); + reader.Read(); + Parser.PushSymbol(Symbol.UnionEnd); + } + else + { + throw TypeError("start-union"); + } } else { - throw TypeError("start-union"); + // Plain JSON; read the object and infer the type + var objectToken = JToken.ReadFrom(reader); + int matchIndex = -1; + for (int i = 0; i < a.Symbols.Length; i++) + { + if (a.Symbols[i].SymKind == Symbol.Kind.Sequence) + { + JTokenReader objectReader = new JTokenReader(objectToken); + if (IsRecordMatch(a.Symbols[i], objectReader)) + { + if (matchIndex >= 0) + { + throw new AvroTypeException("Ambiguous union: matches both " + a.Labels[matchIndex] + " and " + a.Labels[i]); + } + matchIndex = i; + } + } + } + reader = new CompositeJsonReader(new JTokenReader(objectToken), reader); + reader.Read(); + if (matchIndex >= 0) + { + label = a.Labels[matchIndex]; + Parser.PushSymbol(Symbol.UnionEnd); + } + else + { + throw new AvroTypeException("Unknown union branch"); + } } } - else + else if ( label == null) { throw TypeError("start-union"); } @@ -526,6 +734,142 @@ public override int ReadUnionIndex() return n; } + private bool IsRecordMatch(Symbol symbol, JsonReader objectReader) + { + JsonDecoder innerDecoder = new JsonDecoder(symbol, objectReader, JsonMode.PlainJson); + try + { + + while( objectReader.TokenType != JsonToken.None ) + { + switch(objectReader.TokenType) + { + case JsonToken.PropertyName: + break; + case JsonToken.Integer: + innerDecoder.Advance(Symbol.Int); + break; + case JsonToken.Float: + innerDecoder.Advance(Symbol.Float); + break; + case JsonToken.Boolean: + innerDecoder.Advance(Symbol.Boolean); + break; + case JsonToken.Date: + innerDecoder.Advance(Symbol.JsonDateTime); + break; + case JsonToken.String: + innerDecoder.Advance(Symbol.String); + break; + case JsonToken.Null: + innerDecoder.Advance(Symbol.Null); + break; + case JsonToken.Bytes: + innerDecoder.Advance(Symbol.Bytes); + break; + case JsonToken.StartObject: + innerDecoder.Advance(Symbol.RecordStart); + break; + case JsonToken.EndObject: + break; + case JsonToken.StartArray: + innerDecoder.Advance(Symbol.ArrayStart); + break; + case JsonToken.EndArray: + innerDecoder.Advance(Symbol.ArrayEnd); + break; + default: + break; + } + objectReader.Read(); + } + } + catch (AvroTypeException) + { + return false; + } + return true; + } + + /// + public override object ReadLogicalTypeValue(LogicalSchema logicalSchema) + { + switch (logicalSchema.LogicalType) + { + case Util.LogicalUnixEpochType dt: + if (mode == JsonMode.PlainJson) + { + return dt.ConvertToLogicalValue(ReadDateTime(), logicalSchema); + } + else + { + if (logicalSchema.BaseSchema.Tag == Schema.Type.Long) + { + return dt.ConvertToLogicalValue(ReadLong(), logicalSchema); + } + else + { + return dt.ConvertToLogicalValue(ReadInt(), logicalSchema); + } + } + case Util.LogicalUnixEpochType ts: + if (mode == JsonMode.PlainJson) + { + return ts.ConvertToLogicalValue(ReadString(), logicalSchema); + } + else + { + if (logicalSchema.BaseSchema.Tag == Schema.Type.Long) + { + return ts.ConvertToLogicalValue(ReadLong(), logicalSchema); + } + else + { + return ts.ConvertToLogicalValue(ReadInt(), logicalSchema); + } + } + case Util.Decimal dec: + if (mode == JsonMode.PlainJson) + { + return dec.ConvertToLogicalValue(ReadDecimal(logicalSchema.BaseSchema.Tag == Schema.Type.Bytes?Symbol.Bytes:Symbol.Fixed), logicalSchema); + } + else + { + if (logicalSchema.BaseSchema.Tag == Schema.Type.Fixed) + { + byte[] fixedValue = new byte[((FixedSchema)logicalSchema.BaseSchema).Size]; + ReadFixed(fixedValue); + return dec.ConvertToLogicalValue(fixedValue, logicalSchema); + } + else + { + return dec.ConvertToLogicalValue(ReadBytes(), logicalSchema); + } + } + default: + break; + } + + Schema baseSchema = logicalSchema.BaseSchema; + switch (baseSchema.Tag) + { + case Schema.Type.Int: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadInt(), logicalSchema); + case Schema.Type.Long: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadLong(), logicalSchema); + case Schema.Type.Bytes: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadBytes(), logicalSchema); + case Schema.Type.String: + return logicalSchema.LogicalType.ConvertToLogicalValue(ReadString(), logicalSchema); + case Schema.Type.Fixed: + byte[] fixedValue = new byte[((FixedSchema)baseSchema).Size]; + ReadFixed(fixedValue); + return logicalSchema.LogicalType.ConvertToLogicalValue(fixedValue, logicalSchema); + default: + throw new AvroException($"Unsupported logical type: {logicalSchema.Tag}"); + } + } + /// public override void SkipNull() { @@ -651,7 +995,7 @@ public override Symbol DoAction(Symbol input, Symbol top) if (currentReorderBuffer != null && currentReorderBuffer.SavedFields.Count > 0) { throw TypeError("Unknown fields: " + currentReorderBuffer.SavedFields.Keys - .Aggregate((x, y) => x + ", " + y )); + .Aggregate((x, y) => x + ", " + y)); } currentReorderBuffer = reorderBuffers.Pop(); @@ -762,4 +1106,42 @@ private AvroTypeException TypeError(string type) return new AvroTypeException("Expected " + type + ". Got " + reader.TokenType); } } + + class CompositeJsonReader : JsonReader + { + private readonly JsonReader[] readers; + private int currentReader; + + public CompositeJsonReader(JsonReader reader1, JsonReader reader2) + { + this.readers = new[] { reader1, reader2 }; + currentReader = 0; + } + + public override object Value + { + get { return readers[currentReader].Value; } + } + + public override JsonToken TokenType + { + get { return readers[currentReader].TokenType; } + } + + public override bool Read() + { + if (readers[currentReader].Read()) + { + return true; + } + + currentReader++; + if (currentReader < readers.Length) + { + return true; + } + + return false; + } + } } diff --git a/lang/csharp/src/apache/main/IO/JsonEncoder.cs b/lang/csharp/src/apache/main/IO/JsonEncoder.cs index c159a013e8c..5c07c0600a9 100644 --- a/lang/csharp/src/apache/main/IO/JsonEncoder.cs +++ b/lang/csharp/src/apache/main/IO/JsonEncoder.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,9 +21,12 @@ using System.IO; using System.Text; using Newtonsoft.Json; +using System; +using Avro.Util; namespace Avro.IO { + /// /// An for Avro's JSON data encoding. /// @@ -35,6 +38,7 @@ namespace Avro.IO public class JsonEncoder : ParsingEncoder, Parser.IActionHandler { private readonly Parser parser; + private readonly JsonMode mode = JsonMode.AvroJson; private JsonWriter writer; private bool includeNamespace = true; @@ -48,6 +52,13 @@ public JsonEncoder(Schema sc, Stream stream) : this(sc, GetJsonWriter(stream, fa { } + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream, JsonMode mode) : this(sc, GetJsonWriter(stream, false), mode) + { + } + /// /// Initializes a new instance of the class. /// @@ -55,13 +66,28 @@ public JsonEncoder(Schema sc, Stream stream, bool pretty) : this(sc, GetJsonWrit { } + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream, bool pretty, JsonMode mode) : this(sc, GetJsonWriter(stream, pretty), mode) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, JsonWriter writer): this(sc, writer, JsonMode.AvroJson) + { + } + /// /// Initializes a new instance of the class. /// - public JsonEncoder(Schema sc, JsonWriter writer) + public JsonEncoder(Schema sc, JsonWriter writer, JsonMode mode) { Configure(writer); - parser = new Parser((new JsonGrammarGenerator()).Generate(sc), this); + parser = new Parser((new JsonGrammarGenerator(mode)).Generate(sc), this); + this.mode = mode; } /// @@ -123,6 +149,8 @@ public void Configure(JsonWriter jsonWriter) } writer = jsonWriter; + writer.DateFormatHandling = DateFormatHandling.IsoDateFormat; + writer.DateTimeZoneHandling = DateTimeZoneHandling.RoundtripKind; } /// @@ -197,8 +225,16 @@ public override void WriteBytes(byte[] bytes, int start, int len) private void WriteByteArray(byte[] bytes, int start, int len) { - Encoding iso = Encoding.GetEncoding("ISO-8859-1"); - writer.WriteValue(iso.GetString(bytes, start, len)); + if ( mode == JsonMode.AvroJson) + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + writer.WriteValue(iso.GetString(bytes, start, len)); + } + else + { + var base64String = Convert.ToBase64String(bytes, start, len); + writer.WriteValue(base64String); + } } /// @@ -315,14 +351,60 @@ public override void WriteUnionIndex(int unionIndex) Symbol symbol = top.GetSymbol(unionIndex); if (symbol != Symbol.Null && includeNamespace) { - writer.WriteStartObject(); - writer.WritePropertyName(top.GetLabel(unionIndex)); - parser.PushSymbol(Symbol.UnionEnd); + // the wrapper is only written in AvroJson mode + if ( mode == JsonMode.AvroJson) + { + writer.WriteStartObject(); + writer.WritePropertyName(top.GetLabel(unionIndex)); + parser.PushSymbol(Symbol.UnionEnd); + } } - parser.PushSymbol(symbol); } + /// + /// + public override void WriteLogicalTypeValue(object value, LogicalSchema schema) + { + parser.Advance(parser.TopSymbol()); + switch (schema.LogicalType) + { + case Util.LogicalUnixEpochType dt: + if (mode == JsonMode.PlainJson) + { + writer.WriteValue(dt.ConvertToBaseValue(value, schema)); + } + else + { + writer.WriteValue(dt.ConvertToBaseValue(value, schema)); + } + break; + case Util.LogicalUnixEpochType ts: + if (mode == JsonMode.PlainJson) + { + writer.WriteValue(ts.ConvertToBaseValue(value, schema)); + } + else + { + writer.WriteValue(ts.ConvertToBaseValue(value, schema)); + } + break; + case Util.Decimal dec: + if (mode == JsonMode.PlainJson) + { + writer.WriteValue(dec.ConvertToBaseValue(value, schema)); + } + else + { + writer.WriteValue(dec.ConvertToBaseValue(value, schema)); + } + break; + default: + writer.WriteValue(schema.LogicalType.ConvertToBaseValue(value, schema)); + break; + } + } + /// /// Perform an action based on the given input. /// diff --git a/lang/csharp/src/apache/main/IO/JsonEncoderMode.cs b/lang/csharp/src/apache/main/IO/JsonEncoderMode.cs new file mode 100644 index 00000000000..a470b80ba89 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/JsonEncoderMode.cs @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Avro.IO +{ + /// + /// The mode of encoding JSON data. + /// + public enum JsonMode + { + /// + /// Avro's JSON data encoding. + /// + AvroJson, + /// + /// Plain JSON data encoding. + /// + PlainJson + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs index 508ea264b83..f8e07f09d9d 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; +using Avro.Util; namespace Avro.IO.Parsing { @@ -27,6 +28,15 @@ namespace Avro.IO.Parsing /// public class JsonGrammarGenerator : ValidatingGrammarGenerator { + private readonly JsonMode mode; + + /// + /// Creates a new instance of the JsonGrammarGenerator class. + /// + public JsonGrammarGenerator(JsonMode mode) : base() + { + this.mode = mode; + } /// /// Returns the non-terminal that is the start symbol for the grammar for the /// grammar for the given schema schema. @@ -84,7 +94,12 @@ protected override Symbol Generate(Schema sc, IDictionary seen) production[--i] = Symbol.RecordStart; foreach (Field f in ((RecordSchema)sc).Fields) { - production[--i] = new Symbol.FieldAdjustAction(n, f.Name, f.Aliases); + var name = f.Name; + if (f.AlternateNames != null && f.AlternateNames.TryGetValue("json", out string jsonName)) + { + name = jsonName; + } + production[--i] = new Symbol.FieldAdjustAction(n, name, f.Aliases); production[--i] = Generate(f.Schema, seen); production[--i] = Symbol.FieldEnd; n++; @@ -96,7 +111,15 @@ protected override Symbol Generate(Schema sc, IDictionary seen) return rresult; } case Schema.Type.Logical: - return Generate((sc as LogicalSchema).BaseSchema, seen); + LogicalSchema logicalSchema = (LogicalSchema)sc; + if (mode == JsonMode.PlainJson && logicalSchema.LogicalType is LogicalUnixEpochType) + { + return Symbol.JsonDateTime; + } + else + { + return Generate((sc as LogicalSchema).BaseSchema, seen); + } default: throw new Exception("Unexpected schema type"); } diff --git a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs index d5f4ee09c43..b4a79987ee0 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -908,6 +908,10 @@ public virtual int FindLabel(string label) /// public static Symbol String { get; } = new Terminal("string"); /// + /// JsonDateTime + /// + public static Symbol JsonDateTime { get; } = new Terminal("string"); + /// /// Bytes /// public static Symbol Bytes { get; } = new Terminal("bytes"); diff --git a/lang/csharp/src/apache/main/IO/ParsingDecoder.cs b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs index ce327613306..ec2024bca53 100644 --- a/lang/csharp/src/apache/main/IO/ParsingDecoder.cs +++ b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -74,6 +74,9 @@ public abstract class ParsingDecoder : Decoder, Parser.IActionHandler, SkipParse /// public abstract void ReadFixed(byte[] buffer, int start, int length); + /// + public abstract object ReadLogicalTypeValue(LogicalSchema logicalSchema); + /// public abstract void SkipNull(); diff --git a/lang/csharp/src/apache/main/IO/ParsingEncoder.cs b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs index 637a6e3465a..ff8a74731ab 100644 --- a/lang/csharp/src/apache/main/IO/ParsingEncoder.cs +++ b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -88,6 +88,9 @@ public abstract class ParsingEncoder : Encoder /// public abstract void WriteFixed(byte[] data, int start, int len); + /// + public abstract void WriteLogicalTypeValue(object value, LogicalSchema schema); + /// public abstract void Flush(); diff --git a/lang/csharp/src/apache/main/Schema/Aliases.cs b/lang/csharp/src/apache/main/Schema/Aliases.cs index 6574e3163d6..1956a8d725c 100644 --- a/lang/csharp/src/apache/main/Schema/Aliases.cs +++ b/lang/csharp/src/apache/main/Schema/Aliases.cs @@ -29,8 +29,8 @@ internal static IList GetSchemaNames(IEnumerable aliases, st return null; } - SchemaName enclosingSchemaName = new SchemaName(enclosingTypeName, enclosingTypeNamespace, null, null); - return aliases.Select(alias => new SchemaName(alias, enclosingSchemaName.Namespace, null, null)).ToList(); + SchemaName enclosingSchemaName = new SchemaName(enclosingTypeName, enclosingTypeNamespace, null, null, null); + return aliases.Select(alias => new SchemaName(alias, enclosingSchemaName.Namespace, null, null, null)).ToList(); } } } diff --git a/lang/csharp/src/apache/main/Schema/EnumSchema.cs b/lang/csharp/src/apache/main/Schema/EnumSchema.cs index 225780310a6..0e15c97231d 100644 --- a/lang/csharp/src/apache/main/Schema/EnumSchema.cs +++ b/lang/csharp/src/apache/main/Schema/EnumSchema.cs @@ -33,6 +33,11 @@ public class EnumSchema : NamedSchema /// public IList Symbols { get; private set; } + /// + /// Map of maps of alternate names for the enum symbols + /// + public IDictionary> AlternateSymbols { get; private set; } + /// /// The default token to use when deserializing an enum when the provided token is not found /// @@ -54,22 +59,27 @@ public class EnumSchema : NamedSchema /// Name of enum /// Namespace of enum /// List of aliases for the name + /// Dictionary of alternate names for this schema name /// List of enum symbols /// Custom properties on this schema /// Documentation for this named schema /// + /// Map of alternate names for the enum symbols public static EnumSchema Create(string name, IEnumerable symbols, string space = null, IEnumerable aliases = null, + IDictionary alternateNames = null, PropertyMap customProperties = null, string doc = null, - string defaultSymbol = null) + string defaultSymbol = null, + IDictionary> altsymbols = null) { - return new EnumSchema(new SchemaName(name, space, null, doc), + return new EnumSchema(new SchemaName(name, space, null, doc, alternateNames), Aliases.GetSchemaNames(aliases, name, space), symbols.ToList(), CreateSymbolsMap(symbols), + altsymbols, customProperties, new SchemaNames(), doc, @@ -88,10 +98,12 @@ internal static EnumSchema NewInstance(JToken jtok, PropertyMap props, SchemaNam { SchemaName name = NamedSchema.GetName(jtok, encspace); var aliases = NamedSchema.GetAliases(jtok, name.Space, name.EncSpace); - + JArray jsymbols = jtok["symbols"] as JArray; if (null == jsymbols) throw new SchemaParseException($"Enum has no symbols: {name} at '{jtok.Path}'"); + + var altsymbols = GetAlternateSymbols(jtok); List symbols = new List(); IDictionary symbolMap = new Dictionary(); @@ -107,7 +119,7 @@ internal static EnumSchema NewInstance(JToken jtok, PropertyMap props, SchemaNam } try { - return new EnumSchema(name, aliases, symbols, symbolMap, props, names, + return new EnumSchema(name, aliases, symbols, symbolMap, altsymbols, props, names, JsonHelper.GetOptionalString(jtok, "doc"), JsonHelper.GetOptionalString(jtok, "default")); } catch (AvroException e) @@ -123,18 +135,22 @@ internal static EnumSchema NewInstance(JToken jtok, PropertyMap props, SchemaNam /// list of aliases for the name /// list of enum symbols /// map of enum symbols and value + /// map of alternate names for the enum symbols /// custom properties on this schema /// list of named schema already read /// documentation for this named schema /// default symbol private EnumSchema(SchemaName name, IList aliases, List symbols, - IDictionary symbolMap, PropertyMap props, SchemaNames names, + IDictionary symbolMap, + IDictionary> altsymbols, + PropertyMap props, SchemaNames names, string doc, string defaultSymbol) : base(Type.Enumeration, name, aliases, props, names, doc) { if (null == name.Name) throw new AvroException("name cannot be null for enum schema."); this.Symbols = symbols; this.symbolMap = symbolMap; + this.AlternateSymbols = altsymbols; if (null != defaultSymbol && !symbolMap.ContainsKey(defaultSymbol)) throw new AvroException($"Default symbol: {defaultSymbol} not found in symbols"); @@ -175,6 +191,39 @@ private static void ValidateSymbolName(string symbol) } } + + /// + /// Parses the 'altnames' property from the given JSON token + /// + /// JSON object to read + /// Dictionary of alternate names. If no 'altnames' specified, then it returns null. + protected static IDictionary> GetAlternateSymbols(JToken jtok) + { + JToken jaltsymbols = jtok["altsymbols"]; + if (null == jaltsymbols) + return null; + + if (jaltsymbols.Type != JTokenType.Object) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + + var altsymbols = new Dictionary>(); + foreach (JProperty jmap in jaltsymbols.Children()) + { + if (jmap.Value.Type != JTokenType.Object) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + Dictionary altmap = new Dictionary(); + foreach (JProperty jalt in jmap.Value.Children()) + { + if (jalt.Value.Type != JTokenType.String) + throw new SchemaParseException($"Alternate symbol must be of type string at '{jtok.Path}'"); + + altmap.Add(jmap.Name, (string)jalt.Value); + } + altsymbols.Add(jmap.Name, altmap); + } + return altsymbols; + } + /// /// Writes enum schema in JSON format /// @@ -190,6 +239,23 @@ protected internal override void WriteJsonFields(Newtonsoft.Json.JsonTextWriter foreach (string s in this.Symbols) writer.WriteValue(s); writer.WriteEndArray(); + if (null != AlternateSymbols) + { + writer.WritePropertyName("altsymbols"); + writer.WriteStartObject(); + foreach (var entry in AlternateSymbols) + { + writer.WritePropertyName(entry.Key); + writer.WriteStartObject(); + foreach (var alt in entry.Value) + { + writer.WritePropertyName(alt.Key); + writer.WriteValue(alt.Value); + } + writer.WriteEndObject(); + } + writer.WriteEndObject(); + } if (null != Default) { writer.WritePropertyName("default"); diff --git a/lang/csharp/src/apache/main/Schema/Field.cs b/lang/csharp/src/apache/main/Schema/Field.cs index 799f265b320..77b99aa73a8 100644 --- a/lang/csharp/src/apache/main/Schema/Field.cs +++ b/lang/csharp/src/apache/main/Schema/Field.cs @@ -54,6 +54,11 @@ public enum SortOrder /// public readonly string Name; + /// + /// Dictionary of alternate names for this field + /// + public IDictionary AlternateNames { get; private set; } + /// /// List of aliases for the field name. /// @@ -103,6 +108,7 @@ public enum SortOrder /// schema for the field type. /// name of the field. /// list of aliases for the name of the field. + /// dictionary of alternate names for the field /// position of the field. /// documentation for the field. /// field's default value if it exists. @@ -112,11 +118,12 @@ public Field(Schema schema, string name, int pos, IList aliases = null, + IDictionary alternateNames = null, string doc = null, JToken defaultValue = null, SortOrder sortorder = SortOrder.ignore, PropertyMap customProperties = null) - : this(schema, name, aliases, pos, doc, defaultValue, sortorder, customProperties) + : this(schema, name, aliases, alternateNames, pos, doc, defaultValue, sortorder, customProperties) { } @@ -126,7 +133,7 @@ public Field(Schema schema, /// A clone of this field with new position. internal Field ChangePosition(int newPosition) { - return new Field(Schema, Name, newPosition, Aliases, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); + return new Field(Schema, Name, newPosition, Aliases, AlternateNames, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); } /// @@ -135,6 +142,7 @@ internal Field ChangePosition(int newPosition) /// schema for the field type /// name of the field /// list of aliases for the name of the field + /// dictionary of alternate names for the field /// position of the field /// documentation for the field /// field's default value if it exists @@ -145,7 +153,7 @@ internal Field ChangePosition(int newPosition) /// or /// type - type cannot be null. /// - internal Field(Schema schema, string name, IList aliases, int pos, string doc, + internal Field(Schema schema, string name, IList aliases, IDictionary alternateNames, int pos, string doc, JToken defaultValue, SortOrder sortorder, PropertyMap props) { if (string.IsNullOrEmpty(name)) @@ -200,6 +208,17 @@ protected internal void writeJson(JsonTextWriter writer, SchemaNames names, stri writer.WriteEndArray(); } + if (null != AlternateNames) + { + writer.WritePropertyName("altnames"); + writer.WriteStartObject(); + foreach (KeyValuePair entry in AlternateNames) + { + writer.WritePropertyName(entry.Key); + writer.WriteValue(entry.Value); + } + writer.WriteEndObject(); + } writer.WriteEndObject(); } @@ -229,6 +248,31 @@ internal static IList GetAliases(JToken jtok) return aliases; } + /// + /// Parses the 'altnames' property from the given JSON token + /// + /// JSON object to read + /// Dictionary of alternate names. If no 'altnames' specified, then it returns null. + internal static IDictionary GetAlternateNames(JToken jtok) + { + JToken jaliases = jtok["altnames"]; + if (null == jaliases) + return null; + + if (jaliases.Type != JTokenType.Object) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + + var altnames = new Dictionary(); + foreach (JProperty jalias in jaliases.Children()) + { + if (jalias.Value.Type != JTokenType.String) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + + altnames.Add(jalias.Name, (string)jalias.Value); + } + return altnames; + } + /// /// Returns the field's custom property value given the property name /// diff --git a/lang/csharp/src/apache/main/Schema/FixedSchema.cs b/lang/csharp/src/apache/main/Schema/FixedSchema.cs index 2b24e6b8689..31a98d290e2 100644 --- a/lang/csharp/src/apache/main/Schema/FixedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/FixedSchema.cs @@ -43,7 +43,7 @@ public class FixedSchema : NamedSchema /// Documentation for this named schema public static FixedSchema Create(string name, int size, string space = null, IEnumerable aliases = null, PropertyMap customProperties = null, string doc = null) { - return new FixedSchema(new SchemaName(name, space, null, doc), Aliases.GetSchemaNames(aliases, name, space), size, customProperties, new SchemaNames(), doc); + return new FixedSchema(new SchemaName(name, space, null, doc, null), Aliases.GetSchemaNames(aliases, name, space), size, customProperties, new SchemaNames(), doc); } /// diff --git a/lang/csharp/src/apache/main/Schema/NamedSchema.cs b/lang/csharp/src/apache/main/Schema/NamedSchema.cs index fe9d2145b1d..c036a486db9 100644 --- a/lang/csharp/src/apache/main/Schema/NamedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/NamedSchema.cs @@ -41,6 +41,7 @@ public override string Name get { return SchemaName.Name; } } + /// /// Namespace of the schema /// @@ -91,7 +92,7 @@ internal static NamedSchema NewInstance(JObject jo, PropertyMap props, SchemaNam return RecordSchema.NewInstance(Type.Error, jo, props, names, encspace); default: NamedSchema result; - if (names.TryGetValue(type, null, encspace, doc, out result)) + if (names.TryGetValue(type, null, encspace, doc, null, out result)) return result; return null; } @@ -107,8 +108,7 @@ internal static NamedSchema NewInstance(JObject jo, PropertyMap props, SchemaNam /// list of named schemas already read /// documentation for this named schema protected NamedSchema(Type type, SchemaName name, IList aliases, PropertyMap props, SchemaNames names, - string doc) - : base(type, props) + string doc) : base(type, props) { this.SchemaName = name; this.Documentation = doc; @@ -130,7 +130,7 @@ protected static SchemaName GetName(JToken jtok, string encspace) String n = JsonHelper.GetOptionalString(jtok, "name"); // Changed this to optional string for anonymous records in messages String ns = JsonHelper.GetOptionalString(jtok, "namespace"); String d = JsonHelper.GetOptionalString(jtok, "doc"); - return new SchemaName(n, ns, encspace, d); + return new SchemaName(n, ns, encspace, d, GetAlternateNames(jtok)); } /// @@ -155,11 +155,36 @@ protected static IList GetAliases(JToken jtok, string space, string if (jalias.Type != JTokenType.String) throw new SchemaParseException($"Aliases must be of format JSON array of strings at '{jtok.Path}'"); - aliases.Add(new SchemaName((string)jalias, space, encspace, null)); + aliases.Add(new SchemaName((string)jalias, space, encspace, null, GetAlternateNames(jtok))); } return aliases; } + /// + /// Parses the 'altnames' property from the given JSON token + /// + /// JSON object to read + /// Dictionary of alternate names. If no 'altnames' specified, then it returns null. + protected static IDictionary GetAlternateNames(JToken jtok) + { + JToken jaliases = jtok["altnames"]; + if (null == jaliases) + return null; + + if (jaliases.Type != JTokenType.Object) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + + var altnames = new Dictionary(); + foreach (JProperty jalias in jaliases.Children()) + { + if (jalias.Value.Type != JTokenType.String) + throw new SchemaParseException($"Aliases must be of format JSON object at '{jtok.Path}'"); + + altnames.Add(jalias.Name, (string)jalias.Value); + } + return altnames; + } + /// /// Determines whether the given schema name is one of this 's /// aliases. diff --git a/lang/csharp/src/apache/main/Schema/RecordSchema.cs b/lang/csharp/src/apache/main/Schema/RecordSchema.cs index 910bc466fe9..8ace268cbfe 100644 --- a/lang/csharp/src/apache/main/Schema/RecordSchema.cs +++ b/lang/csharp/src/apache/main/Schema/RecordSchema.cs @@ -72,15 +72,17 @@ public List Fields /// list of aliases for the record name /// custom properties on this schema /// documentation for this named schema + /// alternate names for this schema public static RecordSchema Create(string name, List fields, string space = null, IEnumerable aliases = null, PropertyMap customProperties = null, - string doc = null) + string doc = null, + IDictionary alternateNames = null) { return new RecordSchema(Type.Record, - new SchemaName(name, space, null, doc), + new SchemaName(name, space, null, doc, alternateNames), Aliases.GetSchemaNames(aliases, name, space), customProperties, fields, @@ -187,6 +189,7 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro var name = GetName(jtok, encspace); var aliases = NamedSchema.GetAliases(jtok, name.Space, name.EncSpace); + var altnames = NamedSchema.GetAlternateNames(jtok); var fields = new List(); var fieldMap = new Dictionary(); var fieldAliasMap = new Dictionary(); @@ -239,7 +242,7 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro /// map of field aliases and field objects /// list of named schema already read /// documentation for this named schema - private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, + private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, List fields, bool request, IDictionary fieldMap, IDictionary fieldAliasMap, SchemaNames names, string doc) : base(type, name, aliases, props, names, doc) @@ -270,6 +273,7 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri sortorder = (Field.SortOrder)Enum.Parse(typeof(Field.SortOrder), jorder); var aliases = Field.GetAliases(jfield); + var alternateNames = Field.GetAlternateNames(jfield); var props = Schema.GetProperties(jfield); var defaultValue = jfield["default"]; @@ -277,7 +281,7 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri if (null == jtype) throw new SchemaParseException($"'type' was not found for field: name at '{jfield.Path}'"); var schema = Schema.ParseJson(jtype, names, encspace); - return new Field(schema, name, aliases, pos, doc, defaultValue, sortorder, props); + return new Field(schema, name, aliases, alternateNames, pos, doc, defaultValue, sortorder, props); } private static void addToFieldMap(Dictionary map, string name, Field field) diff --git a/lang/csharp/src/apache/main/Schema/Schema.cs b/lang/csharp/src/apache/main/Schema/Schema.cs index 3e54653f015..c81c8d6c173 100644 --- a/lang/csharp/src/apache/main/Schema/Schema.cs +++ b/lang/csharp/src/apache/main/Schema/Schema.cs @@ -166,7 +166,7 @@ internal static Schema ParseJson(JToken jtok, SchemaNames names, string encspace if (null != ps) return ps; NamedSchema schema = null; - if (names.TryGetValue(value, null, encspace, null, out schema)) return schema; + if (names.TryGetValue(value, null, encspace, null, null, out schema)) return schema; throw new SchemaParseException($"Undefined name: {value} at '{jtok.Path}'"); } diff --git a/lang/csharp/src/apache/main/Schema/SchemaName.cs b/lang/csharp/src/apache/main/Schema/SchemaName.cs index 7716d7a55ff..775080e8fef 100644 --- a/lang/csharp/src/apache/main/Schema/SchemaName.cs +++ b/lang/csharp/src/apache/main/Schema/SchemaName.cs @@ -58,6 +58,11 @@ public class SchemaName /// public String Namespace { get { return string.IsNullOrEmpty(this.Space) ? this.EncSpace : this.Space; } } + /// + /// Dictionary of alternate names for this schema name + /// + public IDictionary AlternateNames { get; private set; } + /// /// Constructor for SchemaName /// @@ -65,8 +70,10 @@ public class SchemaName /// namespace of the schema /// enclosing namespace of the schema /// documentation of the schema - public SchemaName(string name, string space, string encspace, string documentation) + /// alternate names for the schema + public SchemaName(string name, string space, string encspace, string documentation, IDictionary alternateNames) { + AlternateNames = alternateNames; if (name == null) { // anonymous Name = Space = null; @@ -116,6 +123,17 @@ internal void WriteJson(Newtonsoft.Json.JsonTextWriter writer, SchemaNames names else if (!String.IsNullOrEmpty(this.EncSpace)) // need to put enclosing name space for code generated classes JsonHelper.writeIfNotNullOrEmpty(writer, "namespace", this.EncSpace); } + if (null != this.AlternateNames) + { + writer.WritePropertyName("altnames"); + writer.WriteStartObject(); + foreach (KeyValuePair altname in this.AlternateNames) + { + writer.WritePropertyName(altname.Key); + writer.WriteValue(altname.Value); + } + writer.WriteEndObject(); + } } /// @@ -216,11 +234,12 @@ public bool Add(NamedSchema schema) /// namespace of the schema /// enclosing namespace of the schema /// documentation for the schema + /// alternate names for the schema /// schema object found /// true if name is found in the map, false otherwise - public bool TryGetValue(string name, string space, string encspace, string documentation, out NamedSchema schema) + public bool TryGetValue(string name, string space, string encspace, string documentation, IDictionary alternateNames, out NamedSchema schema) { - SchemaName schemaname = new SchemaName(name, space, encspace, documentation); + SchemaName schemaname = new SchemaName(name, space, encspace, documentation, alternateNames); return Names.TryGetValue(schemaname, out schema); } diff --git a/lang/csharp/src/apache/main/Util/Date.cs b/lang/csharp/src/apache/main/Util/Date.cs index 0f52b586ad4..e036c094689 100644 --- a/lang/csharp/src/apache/main/Util/Date.cs +++ b/lang/csharp/src/apache/main/Util/Date.cs @@ -46,16 +46,46 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + return ConvertToBaseValue(logicalValue, schema); + } + + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) { var date = ((DateTime)logicalValue).Date; - return (date - UnixEpochDateTime).Days; + if (typeof(T) == typeof(DateTime)) + { + return (T)(object)date; + } + if (typeof(T) == typeof(int)) + { + return (T)(object)(date - UnixEpochDateTime).Days; + } + if (typeof(T) == typeof(string)) + { + return (T)(object)(date.ToString("yyyy-MM-dd")); + } + throw new AvroTypeException($"Cannot convert logical type '{Name}' to '{typeof(T).Name}'"); } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noDays = (int)baseValue; - return UnixEpochDateTime.AddDays(noDays); + if (baseValue is int) + { + var noDays = (int)baseValue; + return UnixEpochDateTime.AddDays(noDays); + } + else if (baseValue is DateTime) + { + return ((DateTime)baseValue).Date; + } + else if (baseValue is string) + { + return DateTime.Parse((string)baseValue).Date; + } + throw new AvroTypeException($"Cannot convert base value '{baseValue}' to logical type '{Name}'"); } } } diff --git a/lang/csharp/src/apache/main/Util/Decimal.cs b/lang/csharp/src/apache/main/Util/Decimal.cs index 3714bd18fe8..546c9a0e760 100644 --- a/lang/csharp/src/apache/main/Util/Decimal.cs +++ b/lang/csharp/src/apache/main/Util/Decimal.cs @@ -19,6 +19,7 @@ using System; using System.Globalization; using System.Numerics; +using System.Runtime.CompilerServices; using Avro.Generic; namespace Avro.Util @@ -64,35 +65,65 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - var decimalValue = (AvroDecimal)logicalValue; - var logicalScale = GetScalePropertyValueFromSchema(schema); - var scale = decimalValue.Scale; + return ConvertToBaseValue(logicalValue, schema); + } - if (scale != logicalScale) - throw new ArgumentOutOfRangeException(nameof(logicalValue), $"The decimal value has a scale of {scale} which cannot be encoded against a logical 'decimal' with a scale of {logicalScale}"); + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if ( typeof(T) == typeof(object) ) + { + var decimalValue = (AvroDecimal)logicalValue; + var logicalScale = GetScalePropertyValueFromSchema(schema); + var scale = decimalValue.Scale; - var buffer = decimalValue.UnscaledValue.ToByteArray(); + if (scale != logicalScale) + throw new ArgumentOutOfRangeException(nameof(logicalValue), $"The decimal value has a scale of {scale} which cannot be encoded against a logical 'decimal' with a scale of {logicalScale}"); - Array.Reverse(buffer); + var buffer = decimalValue.UnscaledValue.ToByteArray(); - return Schema.Type.Bytes == schema.BaseSchema.Tag - ? (object)buffer - : (object)new GenericFixed( - (FixedSchema)schema.BaseSchema, - GetDecimalFixedByteArray(buffer, ((FixedSchema)schema.BaseSchema).Size, - decimalValue.Sign < 0 ? (byte)0xFF : (byte)0x00)); - } + Array.Reverse(buffer); + + return Schema.Type.Bytes == schema.BaseSchema.Tag + ? (T)(object)buffer + : (T)(object)new GenericFixed( + (FixedSchema)schema.BaseSchema, + GetDecimalFixedByteArray(buffer, ((FixedSchema)schema.BaseSchema).Size, + decimalValue.Sign < 0 ? (byte)0xFF : (byte)0x00)); + } + else if (typeof(T) == typeof(decimal)) + { + return (T)(object)((AvroDecimal)logicalValue).ToType(); + } + else + { + throw new AvroTypeException($"Unsupported conversion to {typeof(T)}"); + } + } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var buffer = Schema.Type.Bytes == schema.BaseSchema.Tag - ? (byte[])baseValue - : ((GenericFixed)baseValue).Value; - - Array.Reverse(buffer); - - return new AvroDecimal(new BigInteger(buffer), GetScalePropertyValueFromSchema(schema)); + if (baseValue is decimal) + { + return new AvroDecimal((decimal)baseValue); + } + else if ( baseValue is byte[] ) + { + var buffer = (byte[])baseValue; + Array.Reverse(buffer); + return new AvroDecimal(new BigInteger(buffer), GetScalePropertyValueFromSchema(schema)); + } + else if ( baseValue is GenericFixed ) + { + var buffer = ((GenericFixed)baseValue).Value; + Array.Reverse(buffer); + return new AvroDecimal(new BigInteger(buffer), GetScalePropertyValueFromSchema(schema)); + } + else + { + throw new AvroTypeException($"Unsupported conversion from {baseValue.GetType()}"); + } } /// diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs index 36014c97aef..39d533c7047 100644 --- a/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs @@ -50,14 +50,50 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - DateTime date = ((DateTime)logicalValue).ToUniversalTime(); - return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; + return ConvertToBaseValue(logicalValue, schema); + } + + /// + override public T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if ( typeof(T) == typeof(long) ) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (T)(object)((date - UnixEpochDateTime).Ticks / TicksPerMicrosecond); + } + else if ( typeof(T) == typeof(DateTime) ) + { + return (T)(object)((DateTime)logicalValue).ToUniversalTime(); + } + else if ( typeof(T) == typeof(string) ) + { + return (T)(object)((DateTime)logicalValue).ToString("O"); + } + else + { + throw new AvroTypeException("Invalid type conversion"); + } } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond).ToLocalTime(); + if (baseValue is long) + { + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond).ToLocalTime(); + } + else if (baseValue is DateTime) + { + return (DateTime)baseValue; + } + else if (baseValue is string) + { + return DateTime.Parse((string)baseValue); + } + else + { + throw new AvroTypeException("Invalid type conversion"); + } } } } diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs index 4ae86fd087b..b5e27493511 100644 --- a/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs @@ -49,14 +49,50 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - DateTime date = ((DateTime)logicalValue).ToUniversalTime(); - return (long)(date - UnixEpochDateTime).TotalMilliseconds; + return ConvertToBaseValue(logicalValue, schema); + } + + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(long)) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (T)(object)(long)(date - UnixEpochDateTime).TotalMilliseconds; + } + else if (typeof(T) == typeof(DateTime)) + { + return (T)(object)((DateTime)logicalValue).ToUniversalTime(); + } + else if (typeof(T) == typeof(string)) + { + return (T)(object)((DateTime)logicalValue).ToString("O"); + } + else + { + throw new AvroTypeException($"Unsupported conversion to {typeof(T)}"); + } } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - return UnixEpochDateTime.AddMilliseconds((long)baseValue).ToLocalTime(); + if (baseValue is long || baseValue is int ) + { + return UnixEpochDateTime.AddMilliseconds((long)baseValue).ToLocalTime(); + } + else if (baseValue is DateTime) + { + return (DateTime)baseValue; + } + else if (baseValue is string) + { + return DateTime.Parse((string)baseValue).ToLocalTime(); + } + else + { + throw new AvroTypeException($"Unsupported conversion from {baseValue.GetType()}"); + } } } } diff --git a/lang/csharp/src/apache/main/Util/LogicalType.cs b/lang/csharp/src/apache/main/Util/LogicalType.cs index fec6bda36f3..17edb348569 100644 --- a/lang/csharp/src/apache/main/Util/LogicalType.cs +++ b/lang/csharp/src/apache/main/Util/LogicalType.cs @@ -54,6 +54,14 @@ public virtual void ValidateSchema(LogicalSchema schema) /// An object representing the encoded value of the base type. public abstract object ConvertToBaseValue(object logicalValue, LogicalSchema schema); + /// + /// Converts a logical value to an instance of type T + /// + /// The logical value to convert. + /// The schema that represents the target of the conversion. + /// The type to convert to. + public abstract T ConvertToBaseValue(object logicalValue, LogicalSchema schema); + /// /// Converts a base value to an instance of the logical type. /// diff --git a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs index c3226f625ef..8483110348e 100644 --- a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs @@ -56,15 +56,49 @@ public override object ConvertToBaseValue(object logicalValue, LogicalSchema sch return (time - UnixEpochDateTime.TimeOfDay).Ticks / TicksPerMicrosecond; } + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(long)) + { + var time = (TimeSpan)logicalValue; + + ThrowIfOutOfRange(time, nameof(logicalValue)); + + return (T)(object)((time - UnixEpochDateTime.TimeOfDay).Ticks / TicksPerMicrosecond); + } + else if (typeof(T) == typeof(TimeSpan)) + { + return (T)logicalValue; + } + else if (typeof(T) == typeof(string)) + { + return (T)(object)((TimeSpan)logicalValue).ToString("c"); + } + throw new AvroTypeException($"'{LogicalTypeName}' can only be converted to 'long', not '{typeof(T).Name}'."); + } + /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var time = TimeSpan.FromTicks((long)baseValue * TicksPerMicrosecond); + if (baseValue is long) + { + var time = TimeSpan.FromTicks((long)baseValue * TicksPerMicrosecond); - ThrowIfOutOfRange(time, nameof(baseValue)); + ThrowIfOutOfRange(time, nameof(baseValue)); - // Note: UnixEpochDateTime.TimeOfDay is '00:00:00', so the Add is meaningless. This could be 'return time;' - return UnixEpochDateTime.TimeOfDay.Add(time); + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00', so the Add is meaningless. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + else if (baseValue is TimeSpan) + { + return baseValue; + } + else if (baseValue is string) + { + return TimeSpan.Parse((string)baseValue); + } + throw new AvroTypeException($"'{LogicalTypeName}' can only be converted from 'long', not '{baseValue.GetType().Name}'."); } private static void ThrowIfOutOfRange(TimeSpan time, string paramName) diff --git a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs index d3132560063..4ad665374df 100644 --- a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs @@ -48,23 +48,53 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - var time = (TimeSpan)logicalValue; + return ConvertToBaseValue(logicalValue, schema); + } + + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(int)) + { + var time = (TimeSpan)logicalValue; - ThrowIfOutOfRange(time, nameof(logicalValue)); + ThrowIfOutOfRange(time, nameof(logicalValue)); - // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time.TotalMilliseconds; - return (int)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds; + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return (int)time.TotalMilliseconds;' + return (T)(object)(int)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds; + } + else if (typeof(T) == typeof(TimeSpan)) + { + return (T)logicalValue; + } + else if (typeof(T) == typeof(string)) + { + return (T)(object)((TimeSpan)logicalValue).ToString("c"); + } + throw new AvroTypeException($"'{LogicalTypeName}' can only be converted to 'int'"); } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var time = TimeSpan.FromMilliseconds((int)baseValue); + if (baseValue is int) + { + var time = TimeSpan.FromMilliseconds((int)baseValue); - ThrowIfOutOfRange(time, nameof(baseValue)); + ThrowIfOutOfRange(time, nameof(baseValue)); - // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time;' - return UnixEpochDateTime.TimeOfDay.Add(time); + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + else if (baseValue is TimeSpan) + { + return baseValue; + } + else if (baseValue is string) + { + return TimeSpan.Parse((string)baseValue); + } + throw new AvroTypeException($"'{LogicalTypeName}' can only be converted from 'int' or 'string'"); } private static void ThrowIfOutOfRange(TimeSpan time, string paramName) diff --git a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs index 4d8b1cc6cad..b7978048f14 100644 --- a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs @@ -46,14 +46,50 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - var date = ((DateTime)logicalValue).ToUniversalTime(); - return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; + return ConvertToBaseValue(logicalValue, schema); + } + + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(long)) + { + var date = ((DateTime)logicalValue).ToUniversalTime(); + return (T)(object)((date - UnixEpochDateTime).Ticks / TicksPerMicrosecond); + } + else if (typeof(T) == typeof(DateTime)) + { + return (T)logicalValue; + } + else if (typeof(T) == typeof(string)) + { + return (T)(object)((DateTime)logicalValue).ToString("O"); + } + else + { + throw new AvroTypeException($"Cannot convert to type {typeof(T)}"); + } } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond); + if (baseValue is long) + { + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond); + } + else if (baseValue is DateTime) + { + return baseValue; + } + else if (baseValue is string) + { + return DateTime.Parse((string)baseValue); + } + else + { + throw new AvroTypeException("Invalid type conversion"); + } } } } diff --git a/lang/csharp/src/apache/main/Util/TimestampMillisecond.cs b/lang/csharp/src/apache/main/Util/TimestampMillisecond.cs index 481d4745376..abe000c9fa2 100644 --- a/lang/csharp/src/apache/main/Util/TimestampMillisecond.cs +++ b/lang/csharp/src/apache/main/Util/TimestampMillisecond.cs @@ -45,15 +45,47 @@ public override void ValidateSchema(LogicalSchema schema) /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - var date = ((DateTime)logicalValue).ToUniversalTime(); - return (long)(date - UnixEpochDateTime).TotalMilliseconds; + return ConvertToBaseValue(logicalValue, schema); + } + + /// + public override T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(long)) + { + var date = ((DateTime)logicalValue).ToUniversalTime(); + return (T)(object)(long)(date - UnixEpochDateTime).TotalMilliseconds; + } + else if (typeof(T) == typeof(DateTime)) + { + return (T)(object)((DateTime)logicalValue); + } + else + { + throw new AvroTypeException("Unsupported type conversion"); + } } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (long)baseValue; - return UnixEpochDateTime.AddMilliseconds(noMs); + if (baseValue is long || baseValue is int ) + { + var noMs = (long)baseValue; + return UnixEpochDateTime.AddMilliseconds(noMs); + } + else if (baseValue is DateTime) + { + return baseValue; + } + else if ( baseValue is string ) + { + return DateTime.Parse((string)baseValue); + } + else + { + throw new AvroTypeException("Unsupported type conversion"); + } } } } diff --git a/lang/csharp/src/apache/main/Util/Uuid.cs b/lang/csharp/src/apache/main/Util/Uuid.cs index a43ac5846b4..05d03d6a319 100644 --- a/lang/csharp/src/apache/main/Util/Uuid.cs +++ b/lang/csharp/src/apache/main/Util/Uuid.cs @@ -39,7 +39,17 @@ public Uuid() : base(LogicalTypeName) { } /// public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { - return logicalValue.ToString(); + return ConvertToBaseValue(logicalValue, schema); + } + + /// + override public T ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + if (typeof(T) == typeof(string)) + { + return (T)(object)logicalValue.ToString(); + } + throw new AvroTypeException($"Unsupported type: {typeof(T)}"); } /// diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index 1c909275594..b45a8042a1c 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -46,12 +46,9 @@ public class JsonCodecTests "{ \"f2\": 10.4, \"f1\": 10 } ")] [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s1\" ")] [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s2\" ")] - [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 5 }", "\"hello\"")] [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]")] [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k2\": 20, \"k3\": 30 }")] - [TestCase("[ \"int\", \"long\" ]", "{ \"int\": 10 }")] [TestCase("\"string\"", "\"hello\"")] - [TestCase("\"bytes\"", "\"hello\"")] [TestCase("\"int\"", "10")] [TestCase("\"long\"", "10")] [TestCase("\"float\"", "10.0")] @@ -60,26 +57,57 @@ public class JsonCodecTests [TestCase("\"boolean\"", "false")] [TestCase("\"null\"", "null")] public void TestJsonAllTypesValidValues(String schemaStr, String value) + { + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema, mode); + + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(value), + JToken.Parse(fromAvroToJson(avroBytes, schema, true, mode)))); + } + } + + [TestCase("[ \"int\", \"long\" ]", "{ \"int\": 10 }")] + [TestCase("[ \"int\", \"long\" ]", "{ \"long\": 10 }")] + [TestCase("[ \"int\", \"null\" ]", "null")] + [TestCase("\"bytes\"", "\"\\u0068\\u0065\\u006C\\u006C\\u006F\"")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 5 }", "\"\\u0068\\u0065\\u006C\\u006C\\u006F\"")] + public void TestJsonAllTypesValidValuesAvroJson(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema, JsonMode.AvroJson); + + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(value), + JToken.Parse(fromAvroToJson(avroBytes, schema, true, JsonMode.AvroJson)))); + } + + [TestCase("[ \"int\", \"long\" ]", "10")] + [TestCase("[ \"int\", \"null\" ]", "10")] + [TestCase("[ \"int\", \"null\" ]", "null")] + [TestCase("\"bytes\"", "\"aGVsbG8=\"")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 5 }", "\"aGVsbG8=\"")] + + public void TestJsonAllTypesValidValuesPlainJson(String schemaStr, String value) { Schema schema = Schema.Parse(schemaStr); - byte[] avroBytes = fromJsonToAvro(value, schema); + byte[] avroBytes = fromJsonToAvro(value, schema, JsonMode.PlainJson); Assert.IsTrue(JToken.DeepEquals(JToken.Parse(value), - JToken.Parse(fromAvroToJson(avroBytes, schema, true)))); + JToken.Parse(fromAvroToJson(avroBytes, schema, true, JsonMode.PlainJson)))); } + + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { \"name\" : \"f1\", \"type\": \"int\" }, " + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ \"f4\": 10.4, \"f3\": 10 } ")] [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s3\" ")] - [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"")] [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ \"10\", \"20\", \"30\" ]")] [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": \"10\", \"k2\": \"20\"}")] - [TestCase("[ \"int\", \"long\" ]", "10")] [TestCase("\"string\"", "10")] - [TestCase("\"bytes\"", "10")] [TestCase("\"int\"", "\"hi\"")] [TestCase("\"long\"", "\"hi\"")] [TestCase("\"float\"", "\"hi\"")] @@ -88,9 +116,32 @@ public void TestJsonAllTypesValidValues(String schemaStr, String value) [TestCase("\"boolean\"", "\"hi\"")] [TestCase("\"null\"", "\"hi\"")] public void TestJsonAllTypesInvalidValues(String schemaStr, String value) + { + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema, mode)); + } + } + + [TestCase("[ \"int\", \"long\" ]", "10")] + [TestCase("\"bytes\"", "10")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"")] + public void TestJsonAllTypesInvalidValuesAvroJson(String schemaStr, String value) { Schema schema = Schema.Parse(schemaStr); - Assert.Throws(() => fromJsonToAvro(value, schema)); + Assert.Throws(() => fromJsonToAvro(value, schema, JsonMode.AvroJson)); + } + + [TestCase("[ \"int\", \"long\" ]", "{ \"int\": 10}")] + [TestCase("\"bytes\"", "10")] + [TestCase("\"bytes\"", "\"&10\"")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"129837\"")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"abc&\"")] + public void TestJsonAllTypesInvalidValuesPlainJson(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema, JsonMode.PlainJson)); } [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + @@ -102,116 +153,143 @@ public void TestJsonAllTypesInvalidValues(String schemaStr, String value) [TestCase("\"string\"", "\"hi")] public void TestJsonMalformed(String schemaStr, String value) { - Schema schema = Schema.Parse(schemaStr); - Assert.Throws(() => fromJsonToAvro(value, schema)); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema, mode)); + } } [Test] public void TestJsonEncoderWhenIncludeNamespaceOptionIsFalse() { - string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; - string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + - "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + - "]}"; - Schema schema = Schema.Parse(schemaStr); - byte[] avroBytes = fromJsonToAvro(value, schema); - - Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":\"myVal\",\"a\":1}"), - JObject.Parse(fromAvroToJson(avroBytes, schema, false)))); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + string value = (mode == JsonMode.AvroJson) + ? "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}" + : "{\"b\": \"myVal\", \"a\": 1}"; + + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema, mode); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":\"myVal\",\"a\":1}"), + JObject.Parse(fromAvroToJson(avroBytes, schema, false, mode)))); + } } [Test] public void TestJsonEncoderWhenIncludeNamespaceOptionIsTrue() { - string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; - string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + - "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + - "]}"; - Schema schema = Schema.Parse(schemaStr); - byte[] avroBytes = fromJsonToAvro(value, schema); - - Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), - JObject.Parse(fromAvroToJson(avroBytes, schema, true)))); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + string value = (mode == JsonMode.AvroJson) + ? "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}" + : "{\"b\": \"myVal\", \"a\": 1}"; + + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema, mode); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse(value), + JObject.Parse(fromAvroToJson(avroBytes, schema, true, mode)))); + } } [Test] public void TestJsonRecordOrdering() { - string value = "{\"b\": 2, \"a\": 1}"; - Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + - "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + - "]}"); - GenericDatumReader reader = new GenericDatumReader(schema, schema); - Decoder decoder = new JsonDecoder(schema, value); - object o = reader.Read(null, decoder); - - Assert.AreEqual("{\"a\":1,\"b\":2}", fromDatumToJson(o, schema, false)); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + string value = "{\"b\": 2, \"a\": 1}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":1,\"b\":2}", fromDatumToJson(o, schema, false, mode)); + } } [Test] public void TestJsonRecordOrdering2() { - string value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + - "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + - "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + - "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + - "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + - "]}"); - GenericDatumReader reader = new GenericDatumReader(schema, schema); - Decoder decoder = new JsonDecoder(schema, value); - object o = reader.Read(null, decoder); - - Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true},\"b\":{\"b1\":\"h\",\"b2\":3.14,\"b3\":1.4}}", - fromDatumToJson(o, schema, false)); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + string value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true},\"b\":{\"b1\":\"h\",\"b2\":3.14,\"b3\":1.4}}", + fromDatumToJson(o, schema, false, mode)); + } } [Test] public void TestJsonRecordOrderingWithProjection() { - String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" - + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" - + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" - + "]}"); - Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + - "]}"); - GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); - Decoder decoder = new JsonDecoder(writerSchema, value); - Object o = reader.Read(null, decoder); - - Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", - fromDatumToJson(o, readerSchema, false)); + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + Object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false, mode)); + } } [Test] public void TestJsonRecordOrderingWithProjection2() { - String value = + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + String value = "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4}, \"a\": {\"a2\":true, \"a1\": null}}"; - Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" - + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" - + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\", \"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" - + "]}"); - - Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + - "]}"); - - GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); - Decoder decoder = new JsonDecoder(writerSchema, value); - object o = reader.Read(null, decoder); - - Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", - fromDatumToJson(o, readerSchema, false)); + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\", \"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false, mode)); + } } [TestCase("{\"int\":123}")] @@ -228,7 +306,24 @@ public void TestJsonUnionWithLogicalTypes(String value) Decoder decoder = new JsonDecoder(schema, value); object o = reader.Read(null, decoder); - Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.AvroJson)); + } + + [TestCase("\"2024-05-01\"")] + [TestCase("\"12345678-1234-5678-1234-123456789012\"")] + [TestCase("null")] + public void TestJsonUnionWithLogicalTypesPlainJson(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " { \"type\": \"string\", \"logicalType\": \"uuid\" }\n" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } [TestCase("{\"int\":123}")] @@ -246,7 +341,44 @@ public void TestJsonUnionWithRecord(String value) Decoder decoder = new JsonDecoder(schema, value); object o = reader.Read(null, decoder); - Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.AvroJson)); + } + + [TestCase("\"2024-05-01\"")] + [TestCase("{\"f1\":123}")] + [TestCase("null")] + public void TestJsonUnionWithRecordPlainJson(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " {\"type\":\"record\",\"name\":\"myrecord\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + + [TestCase("{\"f1\":123}")] + [TestCase("{\"f1\":\"abc\"}")] + public void TestJsonRecordUnionPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } [TestCase("int", 1)] @@ -268,81 +400,121 @@ public void TestJsonDecoderNumeric(string type, object value) } } + [Test] + public void TestJsonDecoderDecimalPlainJson() + { + decimal value = 1.0M; + string def = "{\"type\":\"record\",\"name\":\"X\",\"fields\": [{\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":16},\"name\":\"n\"}]}"; + Schema schema = Schema.Parse(def); + DatumReader reader = new GenericDatumReader(schema, schema); + + string[] records = { "{\"n\":1}", "{\"n\":1.0}" }; + + foreach (GenericRecord g in records.Select(r => reader.Read(null, new JsonDecoder(schema, r, JsonMode.PlainJson)))) + { + decimal d = (decimal)(AvroDecimal)g["n"]; + Assert.AreEqual(value, d); + } + } + // Ensure that even if the order of fields in JSON is different from the order in schema, it works. [Test] public void TestJsonDecoderReorderFields() { - String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) + { + String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + "{\"type\":{\"type\":\"array\",\"items\":\"int\"},\"name\":\"a\"}" + "]}"; - Schema ws = Schema.Parse(w); - String data = "{\"a\":[1,2],\"l\":100}"; - JsonDecoder decoder = new JsonDecoder(ws, data); - Assert.AreEqual(100, decoder.ReadLong()); - decoder.SkipArray(); - data = "{\"l\": 200, \"a\":[1,2]}"; - decoder = new JsonDecoder(ws, data); - Assert.AreEqual(200, decoder.ReadLong()); - decoder.SkipArray(); + Schema ws = Schema.Parse(w); + String data = "{\"a\":[1,2],\"l\":100}"; + JsonDecoder decoder = new JsonDecoder(ws, data, mode); + Assert.AreEqual(100, decoder.ReadLong()); + decoder.SkipArray(); + data = "{\"l\": 200, \"a\":[1,2]}"; + decoder = new JsonDecoder(ws, data, mode); + Assert.AreEqual(200, decoder.ReadLong()); + decoder.SkipArray(); + } } [Test] public void TestJsonDecoderSpecificDatumWriterWithArrayAndMap() { - Root data = new Root(); - Item item = new Item { id = 123456 }; - data.myarray = new List { item }; - data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; - - DatumWriter writer = new SpecificDatumWriter(data.Schema); - - ByteBufferOutputStream bbos = new ByteBufferOutputStream(); - - Encoder encoder = new JsonEncoder(data.Schema, bbos); - writer.Write(data, encoder); - encoder.Flush(); - - List listStreams = bbos.GetBufferList(); - - using (StreamReader reader = new StreamReader(listStreams[0])) + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) { - String output = reader.ReadToEnd(); - Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + DatumWriter writer = new SpecificDatumWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos, mode); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + if ( mode == JsonMode.AvroJson) + { + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + else + { + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}", output); + } + } } } [Test] public void TestJsonDecoderSpecificDefaultWriterWithArrayAndMap() { - Root data = new Root(); - Item item = new Item { id = 123456 }; - data.myarray = new List { item }; - data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; - - SpecificDefaultWriter writer = new SpecificDefaultWriter(data.Schema); - - ByteBufferOutputStream bbos = new ByteBufferOutputStream(); - - Encoder encoder = new JsonEncoder(data.Schema, bbos); - writer.Write(data, encoder); - encoder.Flush(); - - List listStreams = bbos.GetBufferList(); - - using (StreamReader reader = new StreamReader(listStreams[0])) + foreach (JsonMode mode in new[] { JsonMode.AvroJson, JsonMode.PlainJson }) { - String output = reader.ReadToEnd(); - Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + SpecificDefaultWriter writer = new SpecificDefaultWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos, mode); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + if (mode == JsonMode.AvroJson) + { + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + else + { + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}", output); + } + } } } - private byte[] fromJsonToAvro(string json, Schema schema) + private byte[] fromJsonToAvro(string json, Schema schema, JsonMode mode) { DatumReader reader = new GenericDatumReader(schema, schema); GenericDatumWriter writer = new GenericDatumWriter(schema); MemoryStream output = new MemoryStream(); - Decoder decoder = new JsonDecoder(schema, json); + Decoder decoder = new JsonDecoder(schema, json, mode); Encoder encoder = new BinaryEncoder(output); object datum = reader.Read(null, decoder); @@ -354,21 +526,21 @@ private byte[] fromJsonToAvro(string json, Schema schema) return output.ToArray(); } - private string fromAvroToJson(byte[] avroBytes, Schema schema, bool includeNamespace) + private string fromAvroToJson(byte[] avroBytes, Schema schema, bool includeNamespace, JsonMode mode) { GenericDatumReader reader = new GenericDatumReader(schema, schema); Decoder decoder = new BinaryDecoder(new MemoryStream(avroBytes)); object datum = reader.Read(null, decoder); - return fromDatumToJson(datum, schema, includeNamespace); + return fromDatumToJson(datum, schema, includeNamespace, mode); } - private string fromDatumToJson(object datum, Schema schema, bool includeNamespace) + private string fromDatumToJson(object datum, Schema schema, bool includeNamespace, JsonMode mode) { DatumWriter writer = new GenericDatumWriter(schema); MemoryStream output = new MemoryStream(); - JsonEncoder encoder = new JsonEncoder(schema, output); + JsonEncoder encoder = new JsonEncoder(schema, output, mode); encoder.IncludeNamespace = includeNamespace; writer.Write(datum, encoder); encoder.Flush(); diff --git a/lang/csharp/src/apache/test/Schema/AliasesTests.cs b/lang/csharp/src/apache/test/Schema/AliasesTests.cs index 27ad4b23efd..4e742d69d86 100644 --- a/lang/csharp/src/apache/test/Schema/AliasesTests.cs +++ b/lang/csharp/src/apache/test/Schema/AliasesTests.cs @@ -25,25 +25,25 @@ public class AliasesTests [TestCase] public void TestNoNamespace() { - CollectionAssert.AreEqual(new[] { new SchemaName("alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", null)); + CollectionAssert.AreEqual(new[] { new SchemaName("alias", null, null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", null)); } [TestCase] public void TestTypeWithNamespace() { - CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", "space")); + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", "space")); } [TestCase] public void TestTypeWithNamespaceInName() { - CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "space.name", null)); + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "space.name", null)); } [TestCase] public void TestAliasWithNamespace() { - CollectionAssert.AreEqual(new[] { new SchemaName("name.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "name.alias" }, "space.name", null)); + CollectionAssert.AreEqual(new[] { new SchemaName("name.alias", null, null, null, null) }, Aliases.GetSchemaNames(new[] { "name.alias" }, "space.name", null)); } } } diff --git a/lang/csharp/src/apache/test/Schema/SchemaTests.cs b/lang/csharp/src/apache/test/Schema/SchemaTests.cs index 319e9a95be3..6e39ab49a93 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaTests.cs @@ -311,6 +311,7 @@ public void TestRecordCreation(string expectedSchema, string name, string space, IEnumerable recordFields = fieldsNames.Select((fieldName, i) => new Field(PrimitiveSchema.Create(fieldsTypes[i]), fieldName, fieldsAliases[i] == null? null: new List { fieldsAliases[i] }, + null, i, fieldsDocs[i], fieldsDefaultValues[i].ToString(), @@ -346,6 +347,7 @@ public void TestRecordCreationWithDuplicateFields() var recordField = new Field(PrimitiveSchema.Create(Schema.Type.Long), "value", new List { "oldName" }, + null, 0, null, "100", @@ -367,6 +369,7 @@ public void TestRecordFieldNames() { new Field(PrimitiveSchema.Create(Schema.Type.Long), "歳以上", null, + null, 0, null, null, @@ -391,6 +394,7 @@ public void TestRecordCreationWithRecursiveRecord() new Field(PrimitiveSchema.Create(Schema.Type.Long), "value", null, + null, 0, null, null, @@ -430,6 +434,7 @@ public void TestEnum(string s, string[] symbols, string space = null, IEnumerabl symbols, space, aliases, + null, usePropertyMap == true ? propertyMap : null, doc, defaultSymbol); @@ -483,7 +488,7 @@ public void TestEnumDefaultSymbolDoesntExist(string s) [TestCase("name", new string[] { "_A1B2_", "B4324" }, null, new[] { "L1", "L2" }, "underscore in symbols", "_A1B2_", "name", null)] public void TestEnumCreation(string name, string[] symbols, string space, string[] aliases, string doc, string defaultSymbol, string expectedName, string expectedNamespace) { - EnumSchema enumSchema = EnumSchema.Create(name, symbols, space, aliases, null, doc, defaultSymbol); + EnumSchema enumSchema = EnumSchema.Create(name, symbols, space, aliases, null, null, doc, defaultSymbol); Assert.AreEqual(expectedName, enumSchema.Name); CollectionAssert.AreEqual(symbols, enumSchema.Symbols); @@ -670,7 +675,7 @@ public void TestFixedCreation() [TestCase("a", "o.a.h", ExpectedResult = "o.a.h.a")] public string testFullname(string s1, string s2) { - var name = new SchemaName(s1, s2, null, null); + var name = new SchemaName(s1, s2, null, null, null); return name.Fullname; } From 72114de6c0e100fd5fd0fc5b6fc06971e5e82ac6 Mon Sep 17 00:00:00 2001 From: Clemens Vasters Date: Thu, 2 May 2024 14:26:43 +0200 Subject: [PATCH 2/3] Record unions and "const" --- lang/csharp/src/apache/main/IO/JsonDecoder.cs | 149 +++++++++++++----- .../main/IO/Parsing/JsonGrammarGenerator.cs | 12 +- .../src/apache/main/IO/Parsing/Parser.cs | 16 +- .../src/apache/main/IO/Parsing/Symbol.cs | 26 +++ .../IO/Parsing/ValidatingGrammarGenerator.cs | 2 +- .../src/apache/test/IO/JsonCodecTests.cs | 113 ++++++++++++- 6 files changed, 272 insertions(+), 46 deletions(-) diff --git a/lang/csharp/src/apache/main/IO/JsonDecoder.cs b/lang/csharp/src/apache/main/IO/JsonDecoder.cs index bbb250c3d95..4a715092fbc 100644 --- a/lang/csharp/src/apache/main/IO/JsonDecoder.cs +++ b/lang/csharp/src/apache/main/IO/JsonDecoder.cs @@ -19,12 +19,16 @@ using System; using System.CodeDom; using System.Collections.Generic; +using System.ComponentModel; using System.IO; using System.Linq; using System.Text; +using System.Xml; +using System.Xml.Linq; using Avro.IO.Parsing; using Newtonsoft.Json; using Newtonsoft.Json.Linq; +using Newtonsoft.Json.Serialization; namespace Avro.IO { @@ -244,6 +248,22 @@ public override double ReadDouble() public override string ReadString() { Advance(Symbol.String); + if (Parser.TopSymbol() is Symbol.ConstCheckAction) + { + Symbol.ConstCheckAction top = (Symbol.ConstCheckAction)Parser.PopSymbol(); + string expected = (string)top.Value; + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + string readResult = Convert.ToString(reader.Value); + if (!expected.Equals(readResult)) + { + throw new AvroTypeException("Expected constant value: " + expected + " but received: " + readResult); + } + reader.Read(); + return readResult; + } if (Parser.TopSymbol() == Symbol.MapKeyMarker) { Parser.Advance(Symbol.MapKeyMarker); @@ -734,55 +754,93 @@ public override int ReadUnionIndex() return n; } - private bool IsRecordMatch(Symbol symbol, JsonReader objectReader) + private bool IsRecordMatch(Symbol symbol, JTokenReader objectReader) { + // to determine whether a record matches, we need to read the object and compare it to the schema + // this is done by creating a new JsonDecoder on top of the reader and advancing it through the schema + // if the schema matches, we return true, otherwise false JsonDecoder innerDecoder = new JsonDecoder(symbol, objectReader, JsonMode.PlainJson); + + // the required start condition is that the reader is at the start of the object + // and that the symbol is a Sequence + if ( symbol.SymKind != Symbol.Kind.Sequence || objectReader.CurrentToken.Type != JTokenType.Object) + { + return false; + } + // advance the inner decoder to the start of the record + innerDecoder.Parser.Advance(Symbol.RecordStart); + // read the first token of the object + innerDecoder.reader.Read(); + // we're now at the start of the record, so we can start processing the fields + // but we need to do so in the Avro schema field order, so we grab the stack + // of the parser and clone it + var stack = new Stack(innerDecoder.Parser.CloneStack()); try { - - while( objectReader.TokenType != JsonToken.None ) + while ( stack.Count > 0 ) { - switch(objectReader.TokenType) + var currentSymbol = stack.Pop(); + if (currentSymbol == Symbol.ArrayStart) { - case JsonToken.PropertyName: - break; - case JsonToken.Integer: - innerDecoder.Advance(Symbol.Int); - break; - case JsonToken.Float: - innerDecoder.Advance(Symbol.Float); - break; - case JsonToken.Boolean: - innerDecoder.Advance(Symbol.Boolean); - break; - case JsonToken.Date: - innerDecoder.Advance(Symbol.JsonDateTime); - break; - case JsonToken.String: - innerDecoder.Advance(Symbol.String); - break; - case JsonToken.Null: - innerDecoder.Advance(Symbol.Null); - break; - case JsonToken.Bytes: - innerDecoder.Advance(Symbol.Bytes); - break; - case JsonToken.StartObject: - innerDecoder.Advance(Symbol.RecordStart); - break; - case JsonToken.EndObject: - break; - case JsonToken.StartArray: - innerDecoder.Advance(Symbol.ArrayStart); - break; - case JsonToken.EndArray: - innerDecoder.Advance(Symbol.ArrayEnd); - break; - default: - break; + innerDecoder.ReadArrayStart(); + } + else if (currentSymbol == Symbol.ItemEnd) + { + if ( innerDecoder.ReadArrayNext() == 0 ) + { + // pop the repeater + stack.Pop(); + } + } + else if ( currentSymbol == Symbol.MapStart) + { + innerDecoder.SkipMap(); + innerDecoder.reader.Read(); + } + else + { + switch (currentSymbol) + { + case Symbol.FieldAdjustAction fa: + break; + case Symbol.ImplicitAction ia: + break; + case Symbol.Repeater r: + foreach(var s in r.Production) + { + stack.Push(s); + } + break; + default: + innerDecoder.Advance(currentSymbol); + if ( currentSymbol == Symbol.String && stack.Peek() is Symbol.ConstCheckAction) + { + var constCheck = (Symbol.ConstCheckAction)stack.Pop(); + if ( innerDecoder.reader.TokenType != JsonToken.String || !constCheck.Check(innerDecoder.reader.Value)) + { + return false; + } + } + else + if ((currentSymbol == Symbol.Boolean && innerDecoder.reader.TokenType != JsonToken.Boolean) || + (currentSymbol == Symbol.Int && innerDecoder.reader.TokenType != JsonToken.Integer) || + (currentSymbol == Symbol.Long && innerDecoder.reader.TokenType != JsonToken.Integer) || + (currentSymbol == Symbol.Float && innerDecoder.reader.TokenType != JsonToken.Float) || + (currentSymbol == Symbol.Double && innerDecoder.reader.TokenType != JsonToken.Float) || + (currentSymbol == Symbol.String && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.Bytes && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.JsonDateTime && innerDecoder.reader.TokenType != JsonToken.Date) || + (currentSymbol == Symbol.Fixed && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.Enum && innerDecoder.reader.TokenType != JsonToken.String)) + { + return false; + } + innerDecoder.reader.Read(); + break; + } } - objectReader.Read(); } + innerDecoder.Parser.ProcessTrailingImplicitActions(); } catch (AvroTypeException) { @@ -998,7 +1056,14 @@ public override Symbol DoAction(Symbol input, Symbol top) .Aggregate((x, y) => x + ", " + y)); } - currentReorderBuffer = reorderBuffers.Pop(); + if (reorderBuffers.Count > 0) + { + currentReorderBuffer = reorderBuffers.Pop(); + } + else + { + currentReorderBuffer = null; + } } // AVRO-2034 advance beyond the end object for the next record. diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs index f8e07f09d9d..4be8cf5cdb6 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -19,6 +19,7 @@ using System; using System.Collections.Generic; using Avro.Util; +using Newtonsoft.Json; namespace Avro.IO.Parsing { @@ -100,7 +101,16 @@ protected override Symbol Generate(Schema sc, IDictionary seen) name = jsonName; } production[--i] = new Symbol.FieldAdjustAction(n, name, f.Aliases); - production[--i] = Generate(f.Schema, seen); + string constValue = f.GetProperty("const"); + if (constValue != null) + { + var constObj = JsonConvert.DeserializeObject(constValue); + production[--i] = Symbol.NewSeq(new Symbol.ConstCheckAction(constObj), Generate(f.Schema, seen)); + } + else + { + production[--i] = Generate(f.Schema, seen); + } production[--i] = Symbol.FieldEnd; n++; } diff --git a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs index ae788ede05d..986ac81aecf 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -225,5 +225,19 @@ public virtual void Reset() { Pos = 1; } + + /// + /// Clones the stack. + /// + /// A copy of the stack + public Symbol[] CloneStack() + { + var newStack = new Symbol[Pos]; + for (int i = 0; i < Pos; i++) + { + newStack[i] = Stack[i]; + } + return newStack; + } } } diff --git a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs index b4a79987ee0..03801fcef18 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs @@ -653,6 +653,32 @@ public IntCheckAction(int size) : base(Kind.ExplicitAction) } } + /// + /// The const check action. + /// + public class ConstCheckAction : Symbol + { + /// + /// The value. + /// + public object Value { get; private set; } + + /// + /// Constructor + /// + /// + public ConstCheckAction(object value) : base(Kind.ExplicitAction) + { + Value = value; + Production = new Symbol[0]; + } + + internal bool Check(object value) + { + return Value.Equals(value); + } + } + /// /// The writer union action. /// diff --git a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs index 7d109660671..2f923d8fcea 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index b45a8042a1c..ea792f50bf5 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -362,7 +362,7 @@ public void TestJsonUnionWithRecordPlainJson(String value) Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } - + [TestCase("{\"f1\":123}")] [TestCase("{\"f1\":\"abc\"}")] public void TestJsonRecordUnionPlainJson(String value) @@ -381,6 +381,112 @@ public void TestJsonRecordUnionPlainJson(String value) Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } + [TestCase("{\"f1\":\"abc\",\"type\":\"r1\"}")] + [TestCase("{\"f1\":\"abc\",\"type\":\"r2\"}")] + public void TestJsonRecordUnionConstPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"},{\"name\":\"type\",\"type\": \"string\", \"const\":\"r1\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"},{\"name\":\"type\",\"type\": \"string\", \"const\":\"r2\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":[1,2,3]}")] + [TestCase("{\"f1\":[\"a\",\"b\",\"c\"]}")] + public void TestJsonRecordUnionWithArrayPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": {\"type\":\"array\", \"items\":\"int\"}}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": {\"type\":\"array\", \"items\":\"string\"}}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":{\"f2\":123},\"f3\":\"abc\"}")] + [TestCase("{\"f1\":{\"f2\":\"abc\"},\"f3\":\"abc\"}")] + public void TestJsonRecordUnionWithNestedRecordPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": "+ + " {\"type\":\"record\",\"name\":\"myrecord11\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"int\"}]}},"+ + " {\"name\":\"f3\", \"type\":\"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": "+ + " {\"type\":\"record\",\"name\":\"myrecord12\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"string\"}]}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":[{\"f2\":123}],\"f3\":\"abc\"}")] + [TestCase("{\"f1\":[{\"f2\":\"abc\"}],\"f3\":\"abc\"}")] + public void TestJsonRecordUnionWithNestedRecordArrayPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": " + + " {\"type\":\"array\", \"items\": " + + " {\"type\":\"record\",\"name\":\"myrecord11\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"int\"}]}}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": " + + " {\"type\":\"array\", \"items\": " + + " {\"type\":\"record\",\"name\":\"myrecord12\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"string\"}]}}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":123, \"f2\":\"abc\"}")] + [TestCase("{\"f2\":\"abc\", \"f1\": 123}")] + [TestCase("{\"f1\":\"abc\", \"f2\": 123}")] + [TestCase("{\"f2\":123, \"f1\":\"abc\"}")] + public void TestJsonRecordUnionSwappedFieldOrderPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}, {\"name\":\"f2\",\"type\": \"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"}, {\"name\":\"f2\",\"type\": \"int\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + AssertEquivalent(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + [TestCase("int", 1)] [TestCase("long", 1L)] [TestCase("float", 1.0F)] @@ -548,6 +654,11 @@ private string fromDatumToJson(object datum, Schema schema, bool includeNamespac return Encoding.UTF8.GetString(output.ToArray()); } + + private void AssertEquivalent(string json1, string json2) + { + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(json1), JToken.Parse(json2))); + } } public partial class Root : global::Avro.Specific.ISpecificRecord From 542a87455cb666b81b3923d20159676dad18f751 Mon Sep 17 00:00:00 2001 From: Clemens Vasters Date: Fri, 3 May 2024 09:26:06 +0200 Subject: [PATCH 3/3] Added JSON root records feature. --- .../main/IO/Parsing/JsonGrammarGenerator.cs | 60 ++++++++++--------- lang/csharp/src/apache/main/Schema/Field.cs | 24 +++++++- .../src/apache/main/Schema/RecordSchema.cs | 20 ++++++- .../src/apache/test/IO/JsonCodecTests.cs | 52 ++++++++++++++++ .../src/apache/test/Schema/SchemaTests.cs | 4 ++ 5 files changed, 129 insertions(+), 31 deletions(-) diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs index 4be8cf5cdb6..20325a7b7ce 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -86,38 +86,44 @@ protected override Symbol Generate(Schema sc, IDictionary seen) LitS wsc = new LitS(sc); if (!seen.TryGetValue(wsc, out Symbol rresult)) { - Symbol[] production = new Symbol[((RecordSchema)sc).Fields.Count * 3 + 2]; - rresult = Symbol.NewSeq(production); - seen[wsc] = rresult; - - int i = production.Length; - int n = 0; - production[--i] = Symbol.RecordStart; - foreach (Field f in ((RecordSchema)sc).Fields) + RecordSchema recordSchema = (RecordSchema)sc; + if (recordSchema.TryGetRootField(out var rootField)) { - var name = f.Name; - if (f.AlternateNames != null && f.AlternateNames.TryGetValue("json", out string jsonName)) - { - name = jsonName; - } - production[--i] = new Symbol.FieldAdjustAction(n, name, f.Aliases); - string constValue = f.GetProperty("const"); - if (constValue != null) - { - var constObj = JsonConvert.DeserializeObject(constValue); - production[--i] = Symbol.NewSeq(new Symbol.ConstCheckAction(constObj), Generate(f.Schema, seen)); - } - else + return Generate(rootField.Schema, seen); + } + else + { + Symbol[] production = new Symbol[recordSchema.Fields.Count * 3 + 2]; + rresult = Symbol.NewSeq(production); + seen[wsc] = rresult; + + int i = production.Length; + int n = 0; + production[--i] = Symbol.RecordStart; + foreach (Field f in recordSchema.Fields) { - production[--i] = Generate(f.Schema, seen); + var name = f.Name; + if (f.AlternateNames != null && f.AlternateNames.TryGetValue("json", out string jsonName)) + { + name = jsonName; + } + production[--i] = new Symbol.FieldAdjustAction(n, name, f.Aliases); + string constValue = f.GetProperty("const"); + if (constValue != null) + { + var constObj = JsonConvert.DeserializeObject(constValue); + production[--i] = Symbol.NewSeq(new Symbol.ConstCheckAction(constObj), Generate(f.Schema, seen)); + } + else + { + production[--i] = Generate(f.Schema, seen); + } + production[--i] = Symbol.FieldEnd; + n++; } - production[--i] = Symbol.FieldEnd; - n++; + production[i - 1] = Symbol.RecordEnd; } - - production[i - 1] = Symbol.RecordEnd; } - return rresult; } case Schema.Type.Logical: diff --git a/lang/csharp/src/apache/main/Schema/Field.cs b/lang/csharp/src/apache/main/Schema/Field.cs index 77b99aa73a8..1b1ff567fc8 100644 --- a/lang/csharp/src/apache/main/Schema/Field.cs +++ b/lang/csharp/src/apache/main/Schema/Field.cs @@ -59,6 +59,11 @@ public enum SortOrder /// public IDictionary AlternateNames { get; private set; } + /// + /// Whether the field is a root field. There must only be one such field in a record. + /// + public bool Root { get; private set; } + /// /// List of aliases for the field name. /// @@ -119,11 +124,12 @@ public Field(Schema schema, int pos, IList aliases = null, IDictionary alternateNames = null, + bool root = false, string doc = null, JToken defaultValue = null, SortOrder sortorder = SortOrder.ignore, PropertyMap customProperties = null) - : this(schema, name, aliases, alternateNames, pos, doc, defaultValue, sortorder, customProperties) + : this(schema, name, aliases, alternateNames, root, pos, doc, defaultValue, sortorder, customProperties) { } @@ -133,7 +139,7 @@ public Field(Schema schema, /// A clone of this field with new position. internal Field ChangePosition(int newPosition) { - return new Field(Schema, Name, newPosition, Aliases, AlternateNames, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); + return new Field(Schema, Name, newPosition, Aliases, AlternateNames, Root, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); } /// @@ -143,6 +149,7 @@ internal Field ChangePosition(int newPosition) /// name of the field /// list of aliases for the name of the field /// dictionary of alternate names for the field + /// whether the field is a root field. There must only be one such field in a record /// position of the field /// documentation for the field /// field's default value if it exists @@ -153,7 +160,7 @@ internal Field ChangePosition(int newPosition) /// or /// type - type cannot be null. /// - internal Field(Schema schema, string name, IList aliases, IDictionary alternateNames, int pos, string doc, + internal Field(Schema schema, string name, IList aliases, IDictionary alternateNames, bool root, int pos, string doc, JToken defaultValue, SortOrder sortorder, PropertyMap props) { if (string.IsNullOrEmpty(name)) @@ -164,11 +171,17 @@ internal Field(Schema schema, string name, IList aliases, IDictionary @@ -219,6 +232,11 @@ protected internal void writeJson(JsonTextWriter writer, SchemaNames names, stri } writer.WriteEndObject(); } + if (Root) + { + writer.WritePropertyName("root"); + writer.WriteValue(Root); + } writer.WriteEndObject(); } diff --git a/lang/csharp/src/apache/main/Schema/RecordSchema.cs b/lang/csharp/src/apache/main/Schema/RecordSchema.cs index 8ace268cbfe..f22d610377f 100644 --- a/lang/csharp/src/apache/main/Schema/RecordSchema.cs +++ b/lang/csharp/src/apache/main/Schema/RecordSchema.cs @@ -55,6 +55,21 @@ public List Fields /// public int Count { get { return Fields.Count; } } + /// + /// Whether the record has a root field + /// + public bool HasRootField => Fields?.Any(f => f.Root)??false; + + /// + /// Gets the root field of the record, if any + /// + /// Root field schema + public bool TryGetRootField(out Field field) + { + field = Fields?.FirstOrDefault(f => f.Root); + return field != null; + } + /// /// Map of field name and Field object for faster field lookups /// @@ -276,18 +291,21 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri var alternateNames = Field.GetAlternateNames(jfield); var props = Schema.GetProperties(jfield); var defaultValue = jfield["default"]; + var root = JsonHelper.GetOptionalBoolean(jfield, "root")??false; JToken jtype = jfield["type"]; if (null == jtype) throw new SchemaParseException($"'type' was not found for field: name at '{jfield.Path}'"); var schema = Schema.ParseJson(jtype, names, encspace); - return new Field(schema, name, aliases, alternateNames, pos, doc, defaultValue, sortorder, props); + return new Field(schema, name, aliases, alternateNames, root, pos, doc, defaultValue, sortorder, props); } private static void addToFieldMap(Dictionary map, string name, Field field) { if (map.ContainsKey(name)) throw new AvroException("field or alias " + name + " is a duplicate name"); + if ((field.Root && map.Count > 0) || map.Values.Any(f => f.Root)) + throw new AvroException("When 'root': true is set on a field, no other fields may exist"); map.Add(name, field); } diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index ea792f50bf5..9a6a596e1fd 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -292,6 +292,8 @@ public void TestJsonRecordOrderingWithProjection2() } } + + [TestCase("{\"int\":123}")] [TestCase("{\"string\":\"12345678-1234-5678-1234-123456789012\"}")] [TestCase("null")] @@ -659,6 +661,56 @@ private void AssertEquivalent(string json1, string json2) { Assert.IsTrue(JToken.DeepEquals(JToken.Parse(json1), JToken.Parse(json2))); } + + [Test] + public void TestRootFeatureWithArray() + { + string schemaStr = "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + "{ \"name\" : \"rootField\", \"type\": { \"type\": \"array\", \"items\": \"int\" }, \"root\": true } " + + "] }"; + string json = "[1,2,3]"; + + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(json, schema, JsonMode.PlainJson); + Assert.IsNotNull(avroBytes); + string convertedJson = fromAvroToJson(avroBytes, schema, false, JsonMode.PlainJson); + Assert.AreEqual(json, convertedJson); + } + + [Test] + public void TestRootFeatureWithMap() + { + string schemaStr = "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + "{ \"name\" : \"rootField\", \"type\": { \"type\": \"map\", \"values\": \"string\" }, \"root\": true } " + + "] }"; + string json = "{\"key1\":\"value1\",\"key2\":\"value2\"}"; + + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(json, schema, JsonMode.PlainJson); + Assert.IsNotNull(avroBytes); + string convertedJson = fromAvroToJson(avroBytes, schema, false, JsonMode.PlainJson); + Assert.AreEqual(json, convertedJson); + } + + [Test] + public void TestRootFeatureWithMultipleRootFields() + { + string schemaStr = "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + "{ \"name\" : \"rootField1\", \"type\": { \"type\": \"array\", \"items\": \"int\" }, \"root\": true }, " + + "{ \"name\" : \"rootField2\", \"type\": { \"type\": \"map\", \"values\": \"string\" }, \"root\": true } " + + "] }"; + Assert.Throws(() => Schema.Parse(schemaStr)); + } + + [Test] + public void TestRootFeatureWithAdditionalField() + { + string schemaStr = "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + "{ \"name\" : \"rootField\", \"type\": { \"type\": \"array\", \"items\": \"int\" }, \"root\": true }, " + + "{ \"name\" : \"additionalField\", \"type\": \"string\" } " + + "] }"; + Assert.Throws(() => Schema.Parse(schemaStr)); + } } public partial class Root : global::Avro.Specific.ISpecificRecord diff --git a/lang/csharp/src/apache/test/Schema/SchemaTests.cs b/lang/csharp/src/apache/test/Schema/SchemaTests.cs index 6e39ab49a93..5187faaada5 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaTests.cs @@ -312,6 +312,7 @@ public void TestRecordCreation(string expectedSchema, string name, string space, fieldName, fieldsAliases[i] == null? null: new List { fieldsAliases[i] }, null, + false, i, fieldsDocs[i], fieldsDefaultValues[i].ToString(), @@ -348,6 +349,7 @@ public void TestRecordCreationWithDuplicateFields() "value", new List { "oldName" }, null, + false, 0, null, "100", @@ -370,6 +372,7 @@ public void TestRecordFieldNames() { "歳以上", null, null, + false, 0, null, null, @@ -395,6 +398,7 @@ public void TestRecordCreationWithRecursiveRecord() "value", null, null, + false, 0, null, null,