Skip to content

Commit

Permalink
Merge branch 'master' into Spark3.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ghislainfourny authored Feb 27, 2024
2 parents 524667d + 1efe9c7 commit f745e31
Show file tree
Hide file tree
Showing 10 changed files with 3,295 additions and 15 deletions.
3,212 changes: 3,212 additions & 0 deletions FLWOR.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ The documentation also contains an introduction specific to RumbleDB and how you

[The documentation of the current master (for the adventurous and curious) is available here.](http://sparksoniq.readthedocs.io/en/latest/)

RumbleDB is an effort involving many researchers and ETH Zurich students: code and support by Stefan Irimescu, Ghislain Fourny, Gustavo Alonso, Renato Marroquin, Rodrigo Bruno, Falko Noé, Ioana Stefan, Andrea Rinaldi, Stevan Mihajlovic, Mario Arduini, Can Berker Çıkış, Elwin Stephan, David Dao, Zirun Wang, Ingo Müller, Dan-Ovidiu Graur, Thomas Zhou, Olivier Goerens, Alexandru Meterez, Remo Röthlisberger, Dominik Bruggisser, David Loughlin.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
(:JIQS: ShouldRun; Output="({ "label" : 0, "name" : "a", "age" : 20, "weight" : 50, "topicDistribution" : [ 0.9915007293228202, 0.0084992706771797 ] }, { "label" : 1, "name" : "b", "age" : 21, "weight" : 55.3, "topicDistribution" : [ 0.9922903440390959, 0.007709655960904113 ] }, { "label" : 2, "name" : "c", "age" : 22, "weight" : 60.6, "topicDistribution" : [ 0.9929414141611251, 0.007058585838874805 ] }, { "label" : 3, "name" : "d", "age" : 23, "weight" : 65.9, "topicDistribution" : [ 0.9934942226012154, 0.006505777398784658 ] }, { "label" : 4, "name" : "e", "age" : 24, "weight" : 70.3, "topicDistribution" : [ 0.9938860832566909, 0.006113916743309047 ] }, { "label" : 5, "name" : "f", "age" : 25, "weight" : 75.6, "topicDistribution" : [ 0.9943034888595204, 0.005696511140479687 ] })" :)
(:JIQS: ShouldRun; Output="({ "label" : 0, "name" : "a", "age" : 20, "weight" : 50, "topicDistribution" : [ 0.99150074, 0.00849927 ] }, { "label" : 1, "name" : "b", "age" : 21, "weight" : 55.3, "topicDistribution" : [ 0.9922903, 0.007709656 ] }, { "label" : 2, "name" : "c", "age" : 22, "weight" : 60.6, "topicDistribution" : [ 0.99294144, 0.007058586 ] }, { "label" : 3, "name" : "d", "age" : 23, "weight" : 65.9, "topicDistribution" : [ 0.9934942, 0.0065057776 ] }, { "label" : 4, "name" : "e", "age" : 24, "weight" : 70.3, "topicDistribution" : [ 0.99388605, 0.0061139166 ] }, { "label" : 5, "name" : "f", "age" : 25, "weight" : 75.6, "topicDistribution" : [ 0.99430346, 0.0056965114 ] })" :)


let $data := annotate(
json-file("../../../../queries/rumbleML/sample-ml-data-flat.json"),
{ "label": "integer", "binaryLabel": "integer", "name": "string", "age": "double", "weight": "double", "booleanCol": "boolean", "nullCol": "null", "stringCol": "string", "stringArrayCol": ["string"], "intArrayCol": ["integer"], "doubleArrayCol": ["double"], "doubleArrayArrayCol": [["double"]] }
Expand All @@ -20,5 +22,5 @@ return {
"name": $result.name,
"age": $result.age,
"weight": $result.weight,
"topicDistribution": $result.topicDistribution
"topicDistribution": [ for $v in $result.topicDistribution[] return float($v) ]
}

Large diffs are not rendered by default.

16 changes: 14 additions & 2 deletions src/test/resources/test_files/RumbleML/RumbleML/MLPipeline1.jq
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "features2" : [ 0.6811443247067493, -1.6574062675838137 ], "rawPrediction" : [ -37.80676708784381, 37.80676708784381 ], "probability" : [ 3.8082870431051935E-17, 1 ], "ocol1" : 1 }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "features2" : [ -2.9389857055027706, 0.48441694826197507 ], "rawPrediction" : [ 37.567707092586666, -37.567707092586666 ], "probability" : [ 1, 0 ], "ocol1" : 0 }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "features2" : [ 0.28609394877170796, 1.1822492465709793 ], "rawPrediction" : [ -19.168353441008513, 19.168353441008513 ], "probability" : [ 4.734671708097413E-9, 0.9999999952653283 ], "ocol1" : 1 })" :)
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "ocol1" : 1, "features2" : [ 0.6811443, -1.6574062 ], "rawPrediction" : [ -37.806767, 37.806767 ], "probability" : [ 3.808287E-17, 1 ] }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "ocol1" : 0, "features2" : [ -2.9389858, 0.48441696 ], "rawPrediction" : [ 37.567707, -37.567707 ], "probability" : [ 1, 0 ] }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "ocol1" : 1, "features2" : [ 0.28609395, 1.1822492 ], "rawPrediction" : [ -19.168354, 19.168354 ], "probability" : [ 4.7346718E-9, 1 ] })" :)

declare function local:round($i as object) as object {
{|
remove-keys($i, ("features2", "rawPrediction", "probability")),
{
"features2" : [ for $v in $i.features2[] return float($v) ],
"rawPrediction" : [ for $v in $i.rawPrediction[] return float($v) ],
"probability" : [ for $v in $i.probability[] return float($v) ]
}
|}
};

let $vector-assembler := get-transformer("VectorAssembler")
let $training-data := (
{"id": 0, "label": 1, "col1": 0.0, "col2": 1.1, "col3": 0.1},
Expand Down Expand Up @@ -26,4 +38,4 @@ for $i in $trained_est2(
$my-new-test-data,
{"featuresCol": "features2", "predictionCol": "ocol1"}
)
return $i
return local:round($i)
17 changes: 15 additions & 2 deletions src/test/resources/test_files/RumbleML/RumbleML/MLPipeline2.jq
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
\1;95;0c(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "features2" : [ 0.6811443247067493, -1.6574062675838137 ], "rawPrediction" : [ -37.806767087840996, 37.806767087840996 ], "probability" : [ 3.808287043115909E-17, 1 ], "ocol1" : 1 }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "features2" : [ -2.9389857055027706, 0.48441694826197507 ], "rawPrediction" : [ 37.567707092585735, -37.567707092585735 ], "probability" : [ 1, 0 ], "ocol1" : 0 }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "features2" : [ 0.28609394877170796, 1.1822492465709793 ], "rawPrediction" : [ -19.168353441006627, 19.168353441006627 ], "probability" : [ 4.734671708106345E-9, 0.9999999952653283 ], "ocol1" : 1 })" :)
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "ocol1" : 1, "features2" : [ 0.6811443, -1.6574062 ], "rawPrediction" : [ -37.806767, 37.806767 ], "probability" : [ 3.808287E-17, 1 ] }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "ocol1" : 0, "features2" : [ -2.9389858, 0.48441696 ], "rawPrediction" : [ 37.567707, -37.567707 ], "probability" : [ 1, 0 ] }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "ocol1" : 1, "features2" : [ 0.28609395, 1.1822492 ], "rawPrediction" : [ -19.168354, 19.168354 ], "probability" : [ 4.7346718E-9, 1 ] })" :)

declare function local:round($i as object) as object {
{|
remove-keys($i, ("features2", "rawPrediction", "probability")),
{
"features2" : [ for $v in $i.features2[] return float($v) ],
"rawPrediction" : [ for $v in $i.rawPrediction[] return float($v) ],
"probability" : [ for $v in $i.probability[] return float($v) ]
}
|}
};


let $vector-assembler := get-transformer("VectorAssembler")(?, { "inputCols" : [ "col1", "col2", "col3" ], "outputCol" : "features" })
let $training-data := (
{"id": 0, "label": 1, "col1": 0.0, "col2": 1.1, "col3": 0.1},
Expand Down Expand Up @@ -26,4 +39,4 @@ for $i in $trained_est2(
$my-new-test-data,
{"featuresCol": "features2", "predictionCol": "ocol1"}
)
return $i
return local:round($i)
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ for $i in $trained_est2(
$my-new-test-data,
{"featuresCol": "features2", "predictionCol": "ocol1"}
)
return $i
return local:round($i)
16 changes: 14 additions & 2 deletions src/test/resources/test_files/RumbleML/RumbleML/MLPipeline4.jq
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "rawPrediction" : [ -38.071372676777095, 38.071372676777095 ], "probability" : [ 2.9228930727481084E-17, 1 ], "prediction" : 1 }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "rawPrediction" : [ 36.2958259818859, -36.2958259818859 ], "probability" : [ 0.9999999999999998, 2.220446049250313E-16 ], "prediction" : 0 }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "rawPrediction" : [ -21.18841691705933, 21.18841691705933 ], "probability" : [ 6.280402132408788E-10, 0.9999999993719598 ], "prediction" : 1 })" :)
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -38.071373, 38.071373 ], "probability" : [ 2.922893E-17, 1 ] }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "prediction" : 0, "features2" : [ ], "rawPrediction" : [ 36.295826, -36.295826 ], "probability" : [ 1, 2.220446E-16 ] }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -21.188417, 21.188417 ], "probability" : [ 6.2804023E-10, 1 ] })" :)

declare function local:round($i as object) as object {
{|
remove-keys($i, ("features2", "rawPrediction", "probability")),
{
"features2" : [ for $v in $i.features2[] return float($v) ],
"rawPrediction" : [ for $v in $i.rawPrediction[] return float($v) ],
"probability" : [ for $v in $i.probability[] return float($v) ]
}
|}
};

declare type local:mytype as {"id": "integer", "label": "integer", "col1": "decimal", "col2": "decimal", "col3": "decimal"};

Expand Down Expand Up @@ -26,4 +37,5 @@ let $test-data := $vector-assembler(validate type local:mytype* {
})

let $pip := local:pipeline($training-data, ?)
return $pip($test-data)
for $i in $pip($test-data)
return local:round($i)
17 changes: 15 additions & 2 deletions src/test/resources/test_files/RumbleML/RumbleML/MLPipeline5.jq
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "rawPrediction" : [ -38.07137267677909, 38.07137267677909 ], "probability" : [ 2.922893072742273E-17, 1 ], "prediction" : 1 }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "rawPrediction" : [ 36.29582598188342, -36.29582598188342 ], "probability" : [ 0.9999999999999998, 2.220446049250313E-16 ], "prediction" : 0 }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "rawPrediction" : [ -21.188416917062117, 21.188416917062117 ], "probability" : [ 6.280402132391294E-10, 0.9999999993719598 ], "prediction" : 1 })" :)
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -38.071373, 38.071373 ], "probability" : [ 2.922893E-17, 1 ] }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "prediction" : 0, "features2" : [ ], "rawPrediction" : [ 36.295826, -36.295826 ], "probability" : [ 1, 2.220446E-16 ] }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -21.188417, 21.188417 ], "probability" : [ 6.2804023E-10, 1 ] })" :)
declare type local:mytype as {"id": "integer", "label": "integer", "col1": "decimal", "col2": "decimal", "col3": "decimal"};

declare function local:round($i as object) as object {
{|
remove-keys($i, ("features2", "rawPrediction", "probability")),
{
"features2" : [ for $v in $i.features2[] return float($v) ],
"rawPrediction" : [ for $v in $i.rawPrediction[] return float($v) ],
"probability" : [ for $v in $i.probability[] return float($v) ]
}
|}
};

let $vector-assembler := get-transformer("VectorAssembler", {"inputCols" : [ "col1", "col2", "col3" ], "outputCol" : "features"})
let $logisticregression := get-estimator("LogisticRegression", { "featuresCol" : "features" })

Expand All @@ -20,4 +31,6 @@ let $test-data := validate type local:mytype* {
}

let $pip := $pipeline($training-data, {})
return $pip($test-data, {})
for $i in $pip($test-data, {})
return local:round($i)

17 changes: 15 additions & 2 deletions src/test/resources/test_files/RumbleML/RumbleML/MLPipeline6.jq
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "rawPrediction" : [ -38.071372676775525, 38.071372676775525 ], "probability" : [ 2.9228930727526986E-17, 1 ], "prediction" : 1 }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "rawPrediction" : [ 36.295825981888, -36.295825981888 ], "probability" : [ 0.9999999999999998, 2.220446049250313E-16 ], "prediction" : 0 }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "rawPrediction" : [ -21.18841691705713, 21.18841691705713 ], "probability" : [ 6.280402132422621E-10, 0.9999999993719598 ], "prediction" : 1 })" :)
(:JIQS: ShouldRun; Output="({ "id" : 0, "label" : 1, "col1" : -1, "col2" : 1.5, "col3" : 1.3, "features" : [ -1, 1.5, 1.3 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -38.071373, 38.071373 ], "probability" : [ 2.922893E-17, 1 ] }, { "id" : 1, "label" : 0, "col1" : 3, "col2" : 2, "col3" : -0.1, "features" : [ 3, 2, -0.1 ], "prediction" : 0, "features2" : [ ], "rawPrediction" : [ 36.295826, -36.295826 ], "probability" : [ 1, 2.220446E-16 ] }, { "id" : 2, "label" : 1, "col1" : 0, "col2" : 2.2, "col3" : -1.5, "features" : [ 0, 2.2, -1.5 ], "prediction" : 1, "features2" : [ ], "rawPrediction" : [ -21.188417, 21.188417 ], "probability" : [ 6.2804023E-10, 1 ] })" :)
declare type local:mytype as {"id": "integer", "label": "integer", "col1": "decimal", "col2": "decimal", "col3": "decimal"};

declare function local:round($i as object) as object {
{|
remove-keys($i, ("features2", "rawPrediction", "probability")),
{
"features2" : [ for $v in $i.features2[] return float($v) ],
"rawPrediction" : [ for $v in $i.rawPrediction[] return float($v) ],
"probability" : [ for $v in $i.probability[] return float($v) ]
}
|}
};

let $pipeline := get-estimator("Pipeline", {
"stages" : [
get-transformer("VectorAssembler", {"inputCols" : [ "col1", "col2", "col3" ], "outputCol" : "features"}),
Expand All @@ -21,4 +32,6 @@ let $test-data := validate type local:mytype* {
}

let $pip := $pipeline($training-data, {})
return $pip($test-data, {})
for $i in $pip($test-data, {})
return local:round($i)

0 comments on commit f745e31

Please sign in to comment.