Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVRO-2825: [csharp] Resolve: C# Logical Types throw exception on unknown logical type #2741

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions lang/csharp/src/apache/main/CodeGen/CodeGen.cs
Original file line number Diff line number Diff line change
Expand Up @@ -893,13 +893,14 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema)
ctd.Members.Add(cmmPut);

string nspace = recordSchema.Namespace;
if (string.IsNullOrEmpty(nspace))
{
throw new CodeGenException("Namespace required for record schema " + recordSchema.Name);
}

CodeNamespace codens = AddNamespace(nspace);

//if (string.IsNullOrEmpty(nspace))
//{
// throw new CodeGenException("Namespace required for record schema " + recordSchema.Name);
//}

// AVRO spec DOES NOT require a Namespace but this code does.
// Workaround is to inject a fixed string that will be obvious if required
CodeNamespace codens = (!string.IsNullOrEmpty(nspace)) ? AddNamespace(nspace) : AddNamespace(@"SchemaHadNoNamespace");
Comment on lines -896 to +903
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not related to logical types. Please move to a separate pull request.

codens.Types.Add(ctd);

return ctd;
Expand Down
11 changes: 8 additions & 3 deletions lang/csharp/src/apache/main/Schema/Schema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,14 @@
return ArraySchema.NewInstance(jtok, props, names, encspace);
if (type.Equals("map", StringComparison.Ordinal))
return MapSchema.NewInstance(jtok, props, names, encspace);
if (null != jo["logicalType"]) // logical type based on a primitive
return LogicalSchema.NewInstance(jtok, props, names, encspace);

try
{
if (null != jo["logicalType"]) // logical type based on a primitive
return LogicalSchema.NewInstance(jtok, props, names, encspace);
}
// swallow exception from unknown logicalType
catch { }

Check notice

Code scanning / CodeQL

Generic catch clause Note

Generic catch clause.

Check notice

Code scanning / CodeQL

Poor error handling: empty catch block Note

Poor error handling: empty catch block.

Comment on lines +195 to +202
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can LogicalSchema.NewInstance instead be made to work with unrecognized logical types? For these purposes:

  • Avoid swallowing any exceptions thrown for other reasons.
  • Allow applications to parse a schema and read the name of the logical type from LogicalSchema.LogicalTypeName (or even LogicalType.Name) regardless of whether the library recognizes it.
  • Allow applications to parse a schema and serialize it back to JSON without losing the unrecognized logical types here:
    private static readonly HashSet<string> ReservedProps = new HashSet<string>() { "type", "name", "namespace", "fields", "items", "size", "symbols", "values", "aliases", "order", "doc", "default", "logicalType" };
    if (ReservedProps.Contains(prop.Name))
    continue;

LogicalSchema.LogicalType could then be null, or perhaps an instance of a new NotSupportedLogicalType class that would pass everything through without conversion.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will work on that, thanks for the feedback

Schema schema = PrimitiveSchema.NewInstance((string)type, props);
if (null != schema)
return schema;
Expand Down
4 changes: 3 additions & 1 deletion lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,9 @@ public void NotSupportedSchema(string schema, Type expectedException)
string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc");
System.IO.File.WriteAllText(schemaFileName, schema);

Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, new Dictionary<string, string>(), false), Is.EqualTo(1));
// Note: Unknown logical types are now ignored
//Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, new Dictionary<string, string>(), false), Is.EqualTo(1));
Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, new Dictionary<string, string>(), false), Is.EqualTo(0));
}
finally
{
Expand Down
12 changes: 10 additions & 2 deletions lang/csharp/src/apache/test/Schema/SchemaTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -548,12 +548,20 @@ public void TestLogicalPrimitive(string s, string baseType, string logicalType)
testToString(sc);
}

// Per AVRO Spec (v1.8.0 - v1.11.1) ... Logical Types Section
// Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type.
[TestCase("{\"type\": \"int\", \"logicalType\": \"unknown\"}", "unknown")]
public void TestUnknownLogical(string s, string unknownType)
{
var err = Assert.Throws<AvroTypeException>(() => Schema.Parse(s));
//var err = Assert.Throws<AvroTypeException>(() => Schema.Parse(s));
Assert.DoesNotThrow(() =>
{
var schema = Schema.Parse(s);
// confirm that the schema defaults to the underlying type
Assert.IsTrue(schema.Name == @"int");
});

Assert.AreEqual("Logical type '" + unknownType + "' is not supported.", err.Message);
//Assert.AreEqual("Logical type '" + unknownType + "' is not supported.", err.Message);
}

[TestCase("{\"type\": \"map\", \"values\": \"long\"}", "long")]
Expand Down
55 changes: 55 additions & 0 deletions lang/csharp/src/apache/test/Util/LogicalTypeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
*/

using System;
using System.Composition.Convention;
using System.Globalization;
using System.Linq;
using System.Numerics;
using Avro.Util;
using NUnit.Framework;
Expand Down Expand Up @@ -419,5 +421,58 @@
var converted = (Guid) avroUuid.ConvertToLogicalValue(avroUuid.ConvertToBaseValue(guid, schema), schema);
Assert.AreEqual(guid, converted);
}

/*
{
"fields": [
{
"default": 0,
"name": "firstField",
"type": "int"
},
{
"default": null,
"name": "secondField",
"type": [
"null",
{
"logicalType": "varchar",
"maxLength": 65,
"type": "string"
}
]
}
],
"name": "sample_schema",
"type": "record"
}
*/

// Before Change will throw Avro.AvroTypeException: 'Logical type 'varchar' is not supported.'
// Per AVRO Spec (v1.8.0 - v1.11.1) ... Logical Types Section
// Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type.
[TestCase("{\"fields\": [{\"default\": 0,\"name\": \"firstField\",\"type\": \"int\"},{\"default\": null,\"name\": \"secondField\",\"type\": [\"null\",{\"logicalType\": \"varchar\",\"maxLength\": 65,\"type\": \"string\"}]}],\"name\": \"sample_schema\",\"type\": \"record\"}")]
public void TestUnknownLogicalType(string schemaText)
{
var schema = Avro.Schema.Parse(schemaText);
Assert.IsNotNull(schema);

var secondField = ((RecordSchema)schema).Fields.FirstOrDefault(f => f.Name == @"secondField");
Assert.IsNotNull(secondField);

var secondFieldSchema = ((Field)secondField).Schema;

Check warning

Code scanning / CodeQL

Cast to same type Warning test

This cast is redundant because the expression already has type Field.
Assert.IsNotNull(secondFieldSchema);

var secondFieldUnionSchema = (UnionSchema)secondFieldSchema;
Assert.IsNotNull(secondFieldUnionSchema);

var props = secondFieldUnionSchema.Schemas.Where(s => s.Props != null).ToList();
Assert.IsNotNull(props);
Assert.IsTrue(props.Count == 1);

var prop = props[0];
// Confirm that the unknown logical type is ignored and the underlying AVRO type is used
Assert.IsTrue(prop.Name == @"string");
}
}
}
Loading