diff --git a/README.md b/README.md index c44435c..c9e31f5 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A performant, portable [jq](https://github.com/stedolan/jq/) wrapper thats facil * Compiles to a single binary for easy portability. * Performant, similar performance with JSON data compared to `jq`. Slightly longer execution time when going to/from a non-JSON format. -* Supports XML and YAML as additional output formats. +* Supports XML and YAML as additional input/output formats. ## Installation @@ -32,7 +32,7 @@ brew install oq If building from source, `jq` will need to be installed separately. Installation instructions can be found in the [official documentation](https://stedolan.github.io/jq/). -Requires Crystal to be installed, see the [installation documentation](https://crystal-lang.org/reference/installation/). +Requires Crystal to be installed, see the [installation documentation](https://crystal-lang.org/install). ```bash git clone https://github.com/Blacksmoke16/oq.git @@ -48,6 +48,42 @@ The built binary will be available as `./bin/oq`. This can be relocated elsewhe Use the `oq` binary, with a few optional custom arguments, see `oq --help`. All other arguments get passed to `jq`. See [jq manual](https://stedolan.github.io/jq/manual/) for details. +### Examples + +#### Consume JSON and output XML + +```bash +echo '{"name": "Jim"}' | oq -o xml . + + + Jim + +``` + +#### Consume YAML from a file and output JSON + +data.yaml + +```yaml +--- +name: Jim +numbers: + - 1 + - 2 + - 3 +``` + +```bash +oq -i yaml -o xml . data.yaml + + + Jim + 1 + 2 + 3 + +``` + ## Roadmap Plans for `1.0.0`: diff --git a/spec/converters/xml_spec.cr b/spec/converters/xml_spec.cr index 20d146e..938d885 100644 --- a/spec/converters/xml_spec.cr +++ b/spec/converters/xml_spec.cr @@ -1,7 +1,309 @@ require "../spec_helper" +WITH_WHITESPACE = <<-XML + + 0 + 0 + 0 + 0 + -1 + 0 + +XML + +XML_SCALAR_ARRAY = <<-XML + + + 1 + 2 + 3 + +XML + +XML_CDATA = <<-XML +Some Description]]> +XML + +XML_OBJECT_ARRAY = <<-XML + + + + 0 + 0 + 0 + 0 + -1 + 0 + + + 0 + 1 + 0 + 0 + -1 + 0 + + +XML + +XML_NESTED_OBJECT_ARRAY = <<-XML + + + + + + cubsfantony + 848 + + Visa/MasterCard, Money Order/Cashiers Checks, Personal Checks, See item description for payment methods accepted + ++ + ct-inc + 403 + + Visa/MasterCard, Discover, Money Order/Cashiers Checks, Personal Checks, See item description for payment methods accepted + + +XML + +XML_INLINE_ARRAY = <<-XML +
+ E. F. Codd + Robert S. Arnold + Jean-Marc Cadiou + Chin-Liang Chang + Nick Roussopoulos + RENDEZVOUS Version 1: An Experimental English Language Query Formulation System for Casual Users of Relational Data Bases. + IBM Research Report + RJ2144 + January + 1978 + db/labs/ibm/RJ2144.html + ibmTR/rj2144.pdf +
+XML + +XML_INLINE_ARRAY_WITHIN_ARRAY = <<-XML + +
+ 1997 + db/labs/dec/SRC1997-018.html + http://www.mcjones.org/System_R/SQL_Reunion_95/ +
+
+ db/labs/gte/TR-0263-08-94-165.html + 1994 +
+
+XML + +XML_DOCTYPE = <<-XML + + + + + Kurt P. Brown + PRPL: A Database Workload Specification Language, v1.3. + 1992 + Univ. of Wisconsin-Madison + + +XML + +XML_ATTRIBUTE_IN_ARRAY = <<-XML + + + 80000 + full-time + + + full-time + + +XML + +XML_ATTRIBUTE_IN_ARRAY_ROOT_ELEMENT = <<-XML + + + + + Kurt P. Brown + + + Tolga Yurek + + +XML + +XML_ALL_EMPTY = <<-XML + + + + + +XML + describe OQ::Converters::Xml do - pending ".deserialize" do + describe ".deserialize" do + describe "should raise if invalid" do + it "should output correctly" do + run_binary(%(Fred), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"person":"Fred"}\n) + end + end + end + + describe "that has only empty children elements" do + it "should output an object with null values" do + run_binary(XML_ALL_EMPTY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"root":{"one":null,"two":null}}\n) + end + end + end + + describe "with whitespace" do + it "should output correctly" do + run_binary(WITH_WHITESPACE, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"item":{"flagID":"0","itemID":"0","locationID":"0","ownerID":"0","quantity":"-1","typeID":"0"}}\n) + end + end + end + + describe "with the prolog" do + it "should output correctly" do + run_binary(%(0), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"item":{"typeID":"0"}}\n) + end + end + end + + describe "a simple object" do + it "should output correctly" do + run_binary(%(JaneDoe), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"person":{"firstname":"Jane","lastname":"Doe"}}\n) + end + end + end + + describe "attributes" do + it "should output correctly" do + run_binary(%(JaneDoe), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"person":{"@id":"1","@foo":"bar","firstname":"Jane","lastname":"Doe"}}\n) + end + end + end + + describe "nested objects" do + it "should output correctly" do + run_binary(%(JaneDoe15061
123 Foo Street
), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"person":{"firstname":"Jane","lastname":"Doe","location":{"zip":"15061","address":"123 Foo Street"}}}\n) + end + end + end + + describe "complex object" do + it "should output correctly" do + run_binary(%(24), args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"root":{"x":{"@a":"1","a":"2"},"y":{"@b":"3","#text":"4"}}}\n) + end + end + end + + describe "with mixed content" do + it "should output correctly" do + run_binary(%(xz), args: ["-i", "xml", "-c", ".root"]) do |output| + output.should eq %({"y":"z"}\n) + end + end + end + + describe "with an inline array" do + it "should output correctly" do + run_binary(XML_INLINE_ARRAY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"article":{"@key":"tr/ibm/RJ2144","author":["E. F. Codd","Robert S. Arnold","Jean-Marc Cadiou","Chin-Liang Chang","Nick Roussopoulos"],"title":"RENDEZVOUS Version 1: An Experimental English Language Query Formulation System for Casual Users of Relational Data Bases.","journal":"IBM Research Report","volume":"RJ2144","month":"January","year":"1978","ee":"db/labs/ibm/RJ2144.html","cdrom":"ibmTR/rj2144.pdf"}}\n) + end + end + end + + describe "with a doctype" do + it "should output correctly" do + run_binary(XML_DOCTYPE, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"dblp":{"mastersthesis":{"@key":"ms/Brown92","author":"Kurt P. Brown","title":"PRPL: A Database Workload Specification Language, v1.3.","year":"1992","school":"Univ. of Wisconsin-Madison"}}}\n) + end + end + end + + describe "with CDATA" do + it "should output correctly" do + run_binary(XML_CDATA, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"desc":"Some Description"}\n) + end + end + end + end + + describe Array do + describe "of scalar values" do + it "should output correctly" do + run_binary(XML_SCALAR_ARRAY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"items":{"number":["1","2","3"]}}\n) + end + end + end + + describe "of objects" do + describe "with no nested objects" do + it "should output correctly" do + run_binary(XML_OBJECT_ARRAY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"items":{"item":[{"flagID":"0","itemID":"0","locationID":"0","ownerID":"0","quantity":"-1","typeID":"0"},{"flagID":"0","itemID":"1","locationID":"0","ownerID":"0","quantity":"-1","typeID":"0"}]}}\n) + end + end + end + + describe "with an inline array" do + it "should output correctly" do + run_binary(XML_INLINE_ARRAY_WITHIN_ARRAY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"articles":{"article":[{"@key":"tr/dec/SRC1997-018","year":"1997","ee":["db/labs/dec/SRC1997-018.html","http://www.mcjones.org/System_R/SQL_Reunion_95/"]},{"@key":"tr/gte/TR-0263-08-94-165","ee":"db/labs/gte/TR-0263-08-94-165.html","year":"1994"}]}}\n) + end + end + end + + describe "with nested objects" do + it "should output correctly" do + run_binary(XML_NESTED_OBJECT_ARRAY, args: ["-i", "xml", "-c", ".root.listing"]) do |output| + output.should eq %([{"seller_info":{"seller_name":" cubsfantony","seller_rating":" 848"},"payment_types":"Visa/MasterCard, Money Order/Cashiers Checks, Personal Checks, See item description for payment methods accepted"},{"seller_info":{"seller_name":" ct-inc","seller_rating":" 403"},"payment_types":"Visa/MasterCard, Discover, Money Order/Cashiers Checks, Personal Checks, See item description for payment methods accepted"}]\n) + end + end + end + end + + describe "with object that has an attribute" do + it "should output correctly" do + run_binary(XML_ATTRIBUTE_IN_ARRAY, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"jobs":{"ad":[{"salary":{"@currency":"CAD","#text":"80000"},"working_hours":"full-time"},{"working_hours":"full-time"}]}}\n) + end + end + end + + describe "where array object element has an attribute" do + it "should output correctly" do + run_binary(XML_ATTRIBUTE_IN_ARRAY_ROOT_ELEMENT, args: ["-i", "xml", "-c", "."]) do |output| + output.should eq %({"dblp":{"mastersthesis":[{"@key":"ms/Brown92","author":"Kurt P. Brown"},{"@key":"ms/Yurek97","author":"Tolga Yurek"}]}}\n) + end + end + end + end end describe ".serialize" do diff --git a/spec/oq_spec.cr b/spec/oq_spec.cr index 201cbfb..be3ae06 100644 --- a/spec/oq_spec.cr +++ b/spec/oq_spec.cr @@ -39,6 +39,30 @@ describe OQ do end end + describe "with the -C option" do + it "should colorize the output" do + run_binary(input: SIMPLE_JSON_OBJECT, args: [".", "-c", "-C"]) do |output| + output.should eq %(\e[1;39m{\e[0m\e[34;1m\"name\"\e[0m\e[1;39m:\e[0m\e[0;32m\"Jim\"\e[0m\e[1;39m\e[1;39m}\e[0m\n) + end + end + end + + describe "with a non-JSON output format" do + it "should convert the JSON to that format" do + run_binary(input: SIMPLE_JSON_OBJECT, args: [".", "-o", "yaml"]) do |output| + output.should eq "---\nname: Jim\n" + end + end + + describe "with the -C option" do + it "should remove the -C option" do + run_binary(input: SIMPLE_JSON_OBJECT, args: [".", "-o", "yaml", "-C"]) do |output| + output.should eq "---\nname: Jim\n" + end + end + end + end + describe "with a file input" do it "should return the correct output" do run_binary(input: "", args: [".", "spec/assets/data1.json"]) do |output| @@ -154,9 +178,27 @@ describe OQ do describe "when there is a jq error" do it "should return the error and correct exit code" do - run_binary(input: ARRAY_JSON_OBJECT, args: [".names | .[] | .name"]) do |_, run, error| + run_binary(input: ARRAY_JSON_OBJECT, args: [".names | .[] | .name"]) do |_, status, error| error.should eq %(jq: error (at :0): Cannot index number with string "name"\n) - run.exit_code.should eq 1 + status.exit_code.should eq 1 + end + end + end + + describe "with an invalid input format" do + it "should return the error and correct exit code" do + run_binary(input: SIMPLE_JSON_OBJECT, args: ["-i", "foo"]) do |_, status, error| + error.should eq %(Invalid input format: 'foo'\n) + status.exit_code.should eq 1 + end + end + end + + describe "with an invalid output format" do + it "should return the error and correct exit code" do + run_binary(input: SIMPLE_JSON_OBJECT, args: ["-o", "foo"]) do |_, status, error| + error.should eq %(Invalid output format: 'foo'\n) + status.exit_code.should eq 1 end end end diff --git a/src/converters/xml.cr b/src/converters/xml.cr index cd36516..89d1ab1 100644 --- a/src/converters/xml.cr +++ b/src/converters/xml.cr @@ -2,7 +2,94 @@ module OQ::Converters::Xml @@at_root : Bool = true def self.deserialize(input : IO, output : IO, **args) : Nil - raise "Not Implemented" + builder = JSON::Builder.new output + xml = XML::Reader.new input + + # Set reader to first element + xml.read + + # Raise an error if the document is invalid and could not be read + raise XML::Error.new LibXML.xmlGetLastError if xml.node_type.none? + + builder.document do + builder.object do + # Skip non element nodes, i.e. the prolog or DOCTYPE, etc. + until xml.node_type.element? + xml.read + end + + # TODO: clean up after crystal-lang/crystal#8186 is released + if node = xml.expand + process_element_node node, builder + else + raise XML::Error.new LibXML.xmlGetLastError + end + end + end + end + + private def self.process_element_node(node : XML::Node, builder : JSON::Builder) : Nil + # If the node doesn't have nested elements nor attributes; just emit a scalar value + return builder.field node.name, get_node_value node if !has_nested_elements(node) && node.attributes.empty? + + # Otherwise process the node as a key/value pair + builder.field node.name do + builder.object do + process_children node, builder + end + end + end + + private def self.process_array_node(name : String, children : Array(XML::Node), builder : JSON::Builder) : Nil + builder.field name do + builder.array do + children.each do |node| + # If the node doesn't have nested elements nor attributes; just emit a scalar value + if !has_nested_elements(node) && node.attributes.empty? + builder.scalar get_node_value node + else + # Otherwise process the node within an object + builder.object do + process_children node, builder + end + end + end + end + end + end + + private def self.process_children(node : XML::Node, builder : JSON::Builder) : Nil + # Process node attributes + node.attributes.each do |attr| + builder.field "@#{attr.name}", attr.content + end + + # Determine how to process a node's children + node.children.group_by(&.name).each do |name, children| + # Skip non significant whitespace; Skip mixed character input + next if children.first.text? && has_nested_elements(node) + + # Array + if children.size > 1 + process_array_node name, children, builder + else + if children.first.text? + # node content in attribute object + builder.field "#text", children.first.content + else + # Element + process_element_node children.first, builder + end + end + end + end + + private def self.has_nested_elements(node : XML::Node) : Bool + node.children.any? { |child| !child.text? && !child.cdata? } + end + + private def self.get_node_value(node : XML::Node) : String? + node.children.empty? || node.children.first.content.blank? ? nil : node.children.first.content end def self.serialize(input : IO, output : IO, **args) : Nil @@ -78,11 +165,11 @@ module OQ::Converters::Xml private def self.get_value(json : JSON::PullParser) : String case json.kind when .string? then json.read_string - when .int? then json.read_int.to_s - when .float? then json.read_float.to_s - when .bool? then json.read_bool.to_s + when .int? then json.read_int + when .float? then json.read_float + when .bool? then json.read_bool else "" - end + end.to_s end end diff --git a/src/oq.cr b/src/oq.cr index 56a764f..38ecc77 100644 --- a/src/oq.cr +++ b/src/oq.cr @@ -64,6 +64,17 @@ module OQ def process : Nil ARGV.replace ARGV - @args + # Add color option if STDOUT is a tty + # and the output format is JSON + # (Since it will go straight to STDOUT and not convertered) + @args << "-C" if STDOUT.tty? && output_format.json? + + # If the -C option was explicially included + # and the output format is not JSON; + # remove it from the args to prevent + # conversion errors + @args.delete("-C") if !output_format.json? + # Shift off the filter from ARGV @args << ARGV.shift unless ARGV.empty? @@ -103,7 +114,7 @@ module OQ error: STDERR ) - exit(1) unless run.success? + exit 1 unless run.success? 2.times do channel.receive @@ -113,8 +124,7 @@ module OQ end private def handle_error(ex : Exception) - puts "oq error: #{ex.message}" - exit(1) + abort "oq error: #{ex.message}" end end end diff --git a/src/oq_cli.cr b/src/oq_cli.cr index d711c29..5cd808e 100644 --- a/src/oq_cli.cr +++ b/src/oq_cli.cr @@ -22,12 +22,11 @@ module OQ Process.run("jq", ["--version"], output: output) - puts "jq: #{output}" - puts "oq: #{OQ::VERSION}" + puts "jq: #{output}", "oq: #{OQ::VERSION}" exit end - parser.on("-i FORMAT", "--input FORMAT", "Format of the input data. Supported formats: #{Format.to_s}") { |format| (f = Format.parse?(format)) && !f.xml? ? processor.input_format = f : (puts "Invalid input format: '#{format}'"; exit(1)) } - parser.on("-o FORMAT", "--output FORMAT", "Format of the output data. Supported formats: #{Format.to_s}") { |format| (f = Format.parse?(format)) ? processor.output_format = f : (puts "Invalid output format: '#{format}'"; exit(1)) } + parser.on("-i FORMAT", "--input FORMAT", "Format of the input data. Supported formats: #{Format.to_s}") { |format| (f = Format.parse?(format)) ? processor.input_format = f : abort "Invalid input format: '#{format}'" } + parser.on("-o FORMAT", "--output FORMAT", "Format of the output data. Supported formats: #{Format.to_s}") { |format| (f = Format.parse?(format)) ? processor.output_format = f : abort "Invalid output format: '#{format}'" } parser.on("--indent NUMBER", "Use the given number of spaces for indentation (JSON/XML only).") { |n| processor.indent = n.to_i; processor.args << "--indent"; processor.args << n } parser.on("--xml-root ROOT", "Name of the root XML element if converting to XML.") { |r| processor.xml_root = r } parser.on("--no-prolog", "Whether the XML prolog should be emitted if converting to XML.") { processor.xml_prolog = false }