From bc44215245f3db621d355a81ffb9bcd68fed9a75 Mon Sep 17 00:00:00 2001 From: Clemens Vasters Date: Thu, 2 May 2024 14:26:43 +0200 Subject: [PATCH] Record unions and "const" --- lang/csharp/src/apache/main/IO/JsonDecoder.cs | 149 +++++++++++++----- .../main/IO/Parsing/JsonGrammarGenerator.cs | 12 +- .../src/apache/main/IO/Parsing/Parser.cs | 16 +- .../src/apache/main/IO/Parsing/Symbol.cs | 26 +++ .../IO/Parsing/ValidatingGrammarGenerator.cs | 2 +- .../src/apache/test/IO/JsonCodecTests.cs | 113 ++++++++++++- 6 files changed, 272 insertions(+), 46 deletions(-) diff --git a/lang/csharp/src/apache/main/IO/JsonDecoder.cs b/lang/csharp/src/apache/main/IO/JsonDecoder.cs index bbb250c3d95..4a715092fbc 100644 --- a/lang/csharp/src/apache/main/IO/JsonDecoder.cs +++ b/lang/csharp/src/apache/main/IO/JsonDecoder.cs @@ -19,12 +19,16 @@ using System; using System.CodeDom; using System.Collections.Generic; +using System.ComponentModel; using System.IO; using System.Linq; using System.Text; +using System.Xml; +using System.Xml.Linq; using Avro.IO.Parsing; using Newtonsoft.Json; using Newtonsoft.Json.Linq; +using Newtonsoft.Json.Serialization; namespace Avro.IO { @@ -244,6 +248,22 @@ public override double ReadDouble() public override string ReadString() { Advance(Symbol.String); + if (Parser.TopSymbol() is Symbol.ConstCheckAction) + { + Symbol.ConstCheckAction top = (Symbol.ConstCheckAction)Parser.PopSymbol(); + string expected = (string)top.Value; + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + string readResult = Convert.ToString(reader.Value); + if (!expected.Equals(readResult)) + { + throw new AvroTypeException("Expected constant value: " + expected + " but received: " + readResult); + } + reader.Read(); + return readResult; + } if (Parser.TopSymbol() == Symbol.MapKeyMarker) { Parser.Advance(Symbol.MapKeyMarker); @@ -734,55 +754,93 @@ public override int ReadUnionIndex() return n; } - private bool IsRecordMatch(Symbol symbol, JsonReader objectReader) + private bool IsRecordMatch(Symbol symbol, JTokenReader objectReader) { + // to determine whether a record matches, we need to read the object and compare it to the schema + // this is done by creating a new JsonDecoder on top of the reader and advancing it through the schema + // if the schema matches, we return true, otherwise false JsonDecoder innerDecoder = new JsonDecoder(symbol, objectReader, JsonMode.PlainJson); + + // the required start condition is that the reader is at the start of the object + // and that the symbol is a Sequence + if ( symbol.SymKind != Symbol.Kind.Sequence || objectReader.CurrentToken.Type != JTokenType.Object) + { + return false; + } + // advance the inner decoder to the start of the record + innerDecoder.Parser.Advance(Symbol.RecordStart); + // read the first token of the object + innerDecoder.reader.Read(); + // we're now at the start of the record, so we can start processing the fields + // but we need to do so in the Avro schema field order, so we grab the stack + // of the parser and clone it + var stack = new Stack(innerDecoder.Parser.CloneStack()); try { - - while( objectReader.TokenType != JsonToken.None ) + while ( stack.Count > 0 ) { - switch(objectReader.TokenType) + var currentSymbol = stack.Pop(); + if (currentSymbol == Symbol.ArrayStart) { - case JsonToken.PropertyName: - break; - case JsonToken.Integer: - innerDecoder.Advance(Symbol.Int); - break; - case JsonToken.Float: - innerDecoder.Advance(Symbol.Float); - break; - case JsonToken.Boolean: - innerDecoder.Advance(Symbol.Boolean); - break; - case JsonToken.Date: - innerDecoder.Advance(Symbol.JsonDateTime); - break; - case JsonToken.String: - innerDecoder.Advance(Symbol.String); - break; - case JsonToken.Null: - innerDecoder.Advance(Symbol.Null); - break; - case JsonToken.Bytes: - innerDecoder.Advance(Symbol.Bytes); - break; - case JsonToken.StartObject: - innerDecoder.Advance(Symbol.RecordStart); - break; - case JsonToken.EndObject: - break; - case JsonToken.StartArray: - innerDecoder.Advance(Symbol.ArrayStart); - break; - case JsonToken.EndArray: - innerDecoder.Advance(Symbol.ArrayEnd); - break; - default: - break; + innerDecoder.ReadArrayStart(); + } + else if (currentSymbol == Symbol.ItemEnd) + { + if ( innerDecoder.ReadArrayNext() == 0 ) + { + // pop the repeater + stack.Pop(); + } + } + else if ( currentSymbol == Symbol.MapStart) + { + innerDecoder.SkipMap(); + innerDecoder.reader.Read(); + } + else + { + switch (currentSymbol) + { + case Symbol.FieldAdjustAction fa: + break; + case Symbol.ImplicitAction ia: + break; + case Symbol.Repeater r: + foreach(var s in r.Production) + { + stack.Push(s); + } + break; + default: + innerDecoder.Advance(currentSymbol); + if ( currentSymbol == Symbol.String && stack.Peek() is Symbol.ConstCheckAction) + { + var constCheck = (Symbol.ConstCheckAction)stack.Pop(); + if ( innerDecoder.reader.TokenType != JsonToken.String || !constCheck.Check(innerDecoder.reader.Value)) + { + return false; + } + } + else + if ((currentSymbol == Symbol.Boolean && innerDecoder.reader.TokenType != JsonToken.Boolean) || + (currentSymbol == Symbol.Int && innerDecoder.reader.TokenType != JsonToken.Integer) || + (currentSymbol == Symbol.Long && innerDecoder.reader.TokenType != JsonToken.Integer) || + (currentSymbol == Symbol.Float && innerDecoder.reader.TokenType != JsonToken.Float) || + (currentSymbol == Symbol.Double && innerDecoder.reader.TokenType != JsonToken.Float) || + (currentSymbol == Symbol.String && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.Bytes && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.JsonDateTime && innerDecoder.reader.TokenType != JsonToken.Date) || + (currentSymbol == Symbol.Fixed && innerDecoder.reader.TokenType != JsonToken.String) || + (currentSymbol == Symbol.Enum && innerDecoder.reader.TokenType != JsonToken.String)) + { + return false; + } + innerDecoder.reader.Read(); + break; + } } - objectReader.Read(); } + innerDecoder.Parser.ProcessTrailingImplicitActions(); } catch (AvroTypeException) { @@ -998,7 +1056,14 @@ public override Symbol DoAction(Symbol input, Symbol top) .Aggregate((x, y) => x + ", " + y)); } - currentReorderBuffer = reorderBuffers.Pop(); + if (reorderBuffers.Count > 0) + { + currentReorderBuffer = reorderBuffers.Pop(); + } + else + { + currentReorderBuffer = null; + } } // AVRO-2034 advance beyond the end object for the next record. diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs index f8e07f09d9d..4be8cf5cdb6 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -19,6 +19,7 @@ using System; using System.Collections.Generic; using Avro.Util; +using Newtonsoft.Json; namespace Avro.IO.Parsing { @@ -100,7 +101,16 @@ protected override Symbol Generate(Schema sc, IDictionary seen) name = jsonName; } production[--i] = new Symbol.FieldAdjustAction(n, name, f.Aliases); - production[--i] = Generate(f.Schema, seen); + string constValue = f.GetProperty("const"); + if (constValue != null) + { + var constObj = JsonConvert.DeserializeObject(constValue); + production[--i] = Symbol.NewSeq(new Symbol.ConstCheckAction(constObj), Generate(f.Schema, seen)); + } + else + { + production[--i] = Generate(f.Schema, seen); + } production[--i] = Symbol.FieldEnd; n++; } diff --git a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs index ae788ede05d..986ac81aecf 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -225,5 +225,19 @@ public virtual void Reset() { Pos = 1; } + + /// + /// Clones the stack. + /// + /// A copy of the stack + public Symbol[] CloneStack() + { + var newStack = new Symbol[Pos]; + for (int i = 0; i < Pos; i++) + { + newStack[i] = Stack[i]; + } + return newStack; + } } } diff --git a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs index b4a79987ee0..03801fcef18 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs @@ -653,6 +653,32 @@ public IntCheckAction(int size) : base(Kind.ExplicitAction) } } + /// + /// The const check action. + /// + public class ConstCheckAction : Symbol + { + /// + /// The value. + /// + public object Value { get; private set; } + + /// + /// Constructor + /// + /// + public ConstCheckAction(object value) : base(Kind.ExplicitAction) + { + Value = value; + Production = new Symbol[0]; + } + + internal bool Check(object value) + { + return Value.Equals(value); + } + } + /// /// The writer union action. /// diff --git a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs index 7d109660671..2f923d8fcea 100644 --- a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs +++ b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index b45a8042a1c..ea792f50bf5 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -362,7 +362,7 @@ public void TestJsonUnionWithRecordPlainJson(String value) Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } - + [TestCase("{\"f1\":123}")] [TestCase("{\"f1\":\"abc\"}")] public void TestJsonRecordUnionPlainJson(String value) @@ -381,6 +381,112 @@ public void TestJsonRecordUnionPlainJson(String value) Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); } + [TestCase("{\"f1\":\"abc\",\"type\":\"r1\"}")] + [TestCase("{\"f1\":\"abc\",\"type\":\"r2\"}")] + public void TestJsonRecordUnionConstPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"},{\"name\":\"type\",\"type\": \"string\", \"const\":\"r1\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"},{\"name\":\"type\",\"type\": \"string\", \"const\":\"r2\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":[1,2,3]}")] + [TestCase("{\"f1\":[\"a\",\"b\",\"c\"]}")] + public void TestJsonRecordUnionWithArrayPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": {\"type\":\"array\", \"items\":\"int\"}}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": {\"type\":\"array\", \"items\":\"string\"}}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":{\"f2\":123},\"f3\":\"abc\"}")] + [TestCase("{\"f1\":{\"f2\":\"abc\"},\"f3\":\"abc\"}")] + public void TestJsonRecordUnionWithNestedRecordPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": "+ + " {\"type\":\"record\",\"name\":\"myrecord11\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"int\"}]}},"+ + " {\"name\":\"f3\", \"type\":\"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": "+ + " {\"type\":\"record\",\"name\":\"myrecord12\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"string\"}]}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":[{\"f2\":123}],\"f3\":\"abc\"}")] + [TestCase("{\"f1\":[{\"f2\":\"abc\"}],\"f3\":\"abc\"}")] + public void TestJsonRecordUnionWithNestedRecordArrayPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": " + + " {\"type\":\"array\", \"items\": " + + " {\"type\":\"record\",\"name\":\"myrecord11\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"int\"}]}}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": " + + " {\"type\":\"array\", \"items\": " + + " {\"type\":\"record\",\"name\":\"myrecord12\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f2\",\"type\":\"string\"}]}}}," + + " {\"name\":\"f3\", \"type\":\"string\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + + [TestCase("{\"f1\":123, \"f2\":\"abc\"}")] + [TestCase("{\"f2\":\"abc\", \"f1\": 123}")] + [TestCase("{\"f1\":\"abc\", \"f2\": 123}")] + [TestCase("{\"f2\":123, \"f1\":\"abc\"}")] + public void TestJsonRecordUnionSwappedFieldOrderPlainJson(String value) + { + Schema schema = Schema.Parse( + "[" + + " {\"type\":\"record\",\"name\":\"myrecord1\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}, {\"name\":\"f2\",\"type\": \"string\"}]}," + + " {\"type\":\"record\",\"name\":\"myrecord2\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"string\"}, {\"name\":\"f2\",\"type\": \"int\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value, JsonMode.PlainJson); + object o = reader.Read(null, decoder); + + AssertEquivalent(value, fromDatumToJson(o, schema, true, JsonMode.PlainJson)); + } + [TestCase("int", 1)] [TestCase("long", 1L)] [TestCase("float", 1.0F)] @@ -548,6 +654,11 @@ private string fromDatumToJson(object datum, Schema schema, bool includeNamespac return Encoding.UTF8.GetString(output.ToArray()); } + + private void AssertEquivalent(string json1, string json2) + { + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(json1), JToken.Parse(json2))); + } } public partial class Root : global::Avro.Specific.ISpecificRecord