diff --git a/Cargo.lock b/Cargo.lock index c3cd5b6..700810b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,50 +153,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - -[[package]] -name = "arrow2" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963fef509b757bcbbf9e5ffa23bcb345614d99f4f6f531f97417b27b8604d389" -dependencies = [ - "ahash", - "arrow-format", - "base64 0.21.7", - "bytemuck", - "chrono", - "dyn-clone", - "either", - "ethnum", - "fallible-streaming-iterator", - "foreign_vec", - "futures", - "getrandom", - "hash_hasher", - "hashbrown", - "lexical-core", - "lz4", - "multiversion", - "num-traits", - "parquet2", - "regex", - "regex-syntax 0.7.5", - "rustc_version", - "simdutf8", - "streaming-iterator", - "strength_reduce", - "zstd", -] - [[package]] name = "ascii-canvas" version = "3.0.0" @@ -457,6 +413,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atoi_simd" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" + [[package]] name = "atomic-waker" version = "1.1.2" @@ -533,6 +495,9 @@ name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] [[package]] name = "blocking" @@ -640,6 +605,28 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "clap" version = "4.5.4" @@ -1288,12 +1275,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - [[package]] name = "hashbrown" version = "0.14.3" @@ -1323,6 +1304,12 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "home" version = "0.5.9" @@ -1596,6 +1583,7 @@ checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -1633,12 +1621,27 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "itoap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" + [[package]] name = "jobserver" version = "0.1.30" @@ -1675,12 +1678,12 @@ dependencies = [ "ascii-canvas", "bit-set", "ena", - "itertools", + "itertools 0.11.0", "lalrpop-util", "petgraph", "pico-args", "regex", - "regex-syntax 0.8.3", + "regex-syntax", "string_cache", "term", "tiny-keccak", @@ -1709,79 +1712,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760" -[[package]] -name = "lexical" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" -dependencies = [ - "lexical-core", -] - -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - [[package]] name = "libc" version = "0.2.153" @@ -1991,9 +1921,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.7.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f930c88a43b1c3f6e776dfe495b4afab89882dbc81530c632db2ed65451ebcb4" +checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" dependencies = [ "async-trait", "base64 0.21.7", @@ -2002,13 +1932,13 @@ dependencies = [ "futures", "humantime", "hyper 0.14.28", - "itertools", + "itertools 0.12.1", "parking_lot", "percent-encoding", "quick-xml", "rand", "reqwest 0.11.27", - "ring 0.16.20", + "ring", "serde", "serde_json", "snafu", @@ -2108,21 +2038,12 @@ dependencies = [ ] [[package]] -name = "parquet2" -version = "0.17.2" +name = "parse-zoneinfo" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579fe5745f02cef3d5f236bfed216fd4693e49e4e920a13475c6132233283bce" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" dependencies = [ - "async-stream", - "brotli", - "flate2", - "futures", - "lz4", - "parquet-format-safe", - "seq-macro", - "snap", - "streaming-decompression", - "zstd", + "regex", ] [[package]] @@ -2141,6 +2062,35 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -2150,6 +2100,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -2216,53 +2175,106 @@ dependencies = [ [[package]] name = "polars" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3030de163b9ff2c9dac9a12dcb9be25cc0f2bc7c8e7cd2e4b2592ebed458ce6a" +checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" dependencies = [ "getrandom", + "polars-arrow", "polars-core", + "polars-error", "polars-io", "polars-lazy", "polars-ops", + "polars-parquet", "polars-sql", "polars-time", + "polars-utils", "version_check", ] [[package]] name = "polars-arrow" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35cd38a64fb389fd990e4efd433a36331c995c981d353bfef83b5de4d87f1828" +checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" dependencies = [ - "arrow2", + "ahash", + "atoi", + "atoi_simd", + "bytemuck", + "chrono", + "chrono-tz", + "dyn-clone", + "either", + "ethnum", + "fast-float", + "foreign_vec", + "futures", + "getrandom", "hashbrown", + "itoa", + "itoap", + "lz4", "multiversion", "num-traits", + "polars-arrow-format", "polars-error", - "thiserror", + "polars-utils", + "ryu", + "serde", + "simdutf8", + "streaming-iterator", + "strength_reduce", + "version_check", + "zstd", +] + +[[package]] +name = "polars-arrow-format" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b0ef2474af9396b19025b189d96e992311e6a47f90c53cd998b36c4c64b84c" +dependencies = [ + "planus", + "serde", +] + +[[package]] +name = "polars-compute" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" +dependencies = [ + "bytemuck", + "either", + "num-traits", + "polars-arrow", + "polars-error", + "polars-utils", + "strength_reduce", "version_check", ] [[package]] name = "polars-core" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08367c014c07fa8f141680e024f926cab3a1fe839605a8fcf2223647eb45ca71" +checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" dependencies = [ "ahash", - "arrow2", "bitflags 2.5.0", + "bytemuck", "chrono", + "chrono-tz", "comfy-table", "either", "hashbrown", "indexmap", "num-traits", - "object_store", "once_cell", "polars-arrow", + "polars-compute", "polars-error", "polars-row", "polars-utils", @@ -2270,54 +2282,60 @@ dependencies = [ "rand_distr", "rayon", "regex", + "serde", "smartstring", "thiserror", - "url", "version_check", "xxhash-rust", ] [[package]] name = "polars-error" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b20a09651a299979354945819dc2ce017964b80b916954e9d2ce39002a5f949" +checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" dependencies = [ - "arrow2", "object_store", + "polars-arrow-format", "regex", + "simdutf8", "thiserror", ] [[package]] name = "polars-io" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf4a89c18a90ac20dfbcdfd19ab50ad4ac5a76fc7bb775d3c28bb738cf1f34" +checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" dependencies = [ "ahash", - "arrow2", "async-trait", + "atoi_simd", "bytes", "chrono", "fast-float", "futures", "home", - "lexical", - "lexical-core", + "itoa", "memchr", "memmap2", "num-traits", "object_store", "once_cell", + "percent-encoding", "polars-arrow", "polars-core", "polars-error", + "polars-parquet", "polars-time", "polars-utils", "rayon", "regex", + "ryu", + "serde", + "serde_json", "simdutf8", + "smartstring", "tokio", "tokio-util", "url", @@ -2325,12 +2343,13 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5110eab438848c981cc5f541fbc5b21bb263fd707000b4715233074fb2630fcf" +checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" dependencies = [ "ahash", "bitflags 2.5.0", + "futures", "glob", "once_cell", "polars-arrow", @@ -2343,40 +2362,80 @@ dependencies = [ "polars-utils", "rayon", "smartstring", + "tokio", "version_check", ] [[package]] name = "polars-ops" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7740d7bc4c2ca08044f9ef599638e116fdd7d687e80d1974b698e390c6ce4252" +checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" dependencies = [ + "ahash", "argminmax", - "arrow2", + "base64 0.21.7", + "bytemuck", + "chrono", + "chrono-tz", "either", + "hashbrown", + "hex", "indexmap", "memchr", + "num-traits", "polars-arrow", + "polars-compute", "polars-core", + "polars-error", "polars-utils", + "rayon", "regex", "smartstring", + "unicode-reverse", "version_check", ] +[[package]] +name = "polars-parquet" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" +dependencies = [ + "ahash", + "async-stream", + "base64 0.21.7", + "brotli", + "ethnum", + "flate2", + "futures", + "lz4", + "num-traits", + "parquet-format-safe", + "polars-arrow", + "polars-error", + "polars-utils", + "seq-macro", + "simdutf8", + "snap", + "streaming-decompression", + "zstd", +] + [[package]] name = "polars-pipe" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f30c5e77c5594ddc958a46fe2e021da2feba9c94e767e1d798bd82ac5a33c3b" +checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" dependencies = [ "crossbeam-channel", "crossbeam-queue", "enum_dispatch", + "futures", "hashbrown", "num-traits", "polars-arrow", + "polars-compute", "polars-core", "polars-io", "polars-ops", @@ -2385,25 +2444,33 @@ dependencies = [ "polars-utils", "rayon", "smartstring", + "tokio", + "uuid", "version_check", ] [[package]] name = "polars-plan" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678cbeb730e29e50f0f8d844102d15454fc6113a74c667eab046c0e4a4322a9e" +checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" dependencies = [ "ahash", - "arrow2", + "bytemuck", + "chrono-tz", + "futures", + "hashbrown", "once_cell", + "percent-encoding", "polars-arrow", "polars-core", "polars-io", "polars-ops", + "polars-parquet", "polars-time", "polars-utils", "rayon", + "recursive", "regex", "smartstring", "strum_macros 0.25.3", @@ -2412,25 +2479,29 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c52ef8885b9d13f848839594fbab21ad79fc63f7e11c19cdc2cfe9bb03c313ac" +checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" dependencies = [ - "arrow2", + "bytemuck", + "polars-arrow", "polars-error", "polars-utils", ] [[package]] name = "polars-sql" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d716855267e3516f722287f68cf10e650e33f7197df83a79e680602471456fc" +checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" dependencies = [ + "hex", "polars-arrow", "polars-core", + "polars-error", "polars-lazy", "polars-plan", + "rand", "serde", "serde_json", "sqlparser", @@ -2438,17 +2509,18 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb75a24f11b55a400b52dc19a2a3e949aaaa46a911f99496de4485b1127063" +checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" dependencies = [ - "arrow2", "atoi", "chrono", + "chrono-tz", "now", "once_cell", "polars-arrow", "polars-core", + "polars-error", "polars-ops", "polars-utils", "regex", @@ -2457,18 +2529,21 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.33.2" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a4a5e743509096322cad39104d56e329fe2748483a3354a0f0c354724f3cef6" +checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" dependencies = [ "ahash", "bytemuck", "hashbrown", + "indexmap", "num-traits", "once_cell", "polars-error", + "raw-cpuid", "rayon", "smartstring", + "stacker", "sysinfo", "version_check", ] @@ -2546,11 +2621,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quick-xml" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", "serde", @@ -2605,6 +2689,15 @@ dependencies = [ "rand", ] +[[package]] +name = "raw-cpuid" +version = "11.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" +dependencies = [ + "bitflags 2.5.0", +] + [[package]] name = "rayon" version = "1.10.0" @@ -2634,6 +2727,26 @@ dependencies = [ "log", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.60", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -2663,7 +2776,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.3", + "regex-syntax", ] [[package]] @@ -2674,15 +2787,9 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax", ] -[[package]] -name = "regex-syntax" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" - [[package]] name = "regex-syntax" version = "0.8.3" @@ -2725,6 +2832,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", + "rustls-native-certs", "rustls-pemfile 1.0.4", "serde", "serde_json", @@ -2741,7 +2849,6 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots", "winreg 0.50.0", ] @@ -2787,21 +2894,6 @@ dependencies = [ "winreg 0.52.0", ] -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.8" @@ -2812,8 +2904,8 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.52.0", ] @@ -2861,16 +2953,28 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.21.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" dependencies = [ "log", - "ring 0.17.8", + "ring", "rustls-webpki", "sct", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2902,8 +3006,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -2972,8 +3076,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -3136,6 +3240,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" dependencies = [ "autocfg", + "serde", "static_assertions", "version_check", ] @@ -3188,12 +3293,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -3202,13 +3301,26 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.36.1" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", ] +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -3245,7 +3357,7 @@ dependencies = [ "new_debug_unreachable", "once_cell", "parking_lot", - "phf_shared", + "phf_shared 0.10.0", "precomputed-hash", ] @@ -3317,16 +3429,16 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sysinfo" -version = "0.29.11" +version = "0.30.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83" dependencies = [ "cfg-if", "core-foundation-sys", "libc", "ntapi", "once_cell", - "winapi", + "windows", ] [[package]] @@ -3618,6 +3730,21 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-reverse" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6f4888ebc23094adfb574fdca9fdc891826287a6397d2cd28802ffd6f20c76" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + [[package]] name = "unicode-width" version = "0.1.11" @@ -3630,12 +3757,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -3659,6 +3780,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +dependencies = [ + "getrandom", +] + [[package]] name = "value-bag" version = "1.8.1" @@ -3797,12 +3927,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki-roots" -version = "0.25.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" - [[package]] name = "winapi" version = "0.3.9" @@ -3834,6 +3958,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets 0.52.5", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -4042,20 +4176,19 @@ dependencies = [ [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.6" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" dependencies = [ - "libc", "zstd-sys", ] diff --git a/Cargo.toml b/Cargo.toml index 23b85f6..520a5b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,9 @@ serde = { version = "1.0", features = ["derive"] } serde_json = {version="1.0"} tokio = { version = "1.30.0", features = ["full"] } clap = { version = "4.5.0", features = ["derive"] } -polars = {version ="0.33.2", features=["lazy","aws", "parquet"]} +polars = {version ="0.39.2", features=["lazy","is_in","http","streaming", "parquet","polars-io"]} typify = "0.0.16" -chrono = "0.4.37" +chrono = {version="0.4.37", features=['serde']} reqwest = {version = "0.12.3", features = ["json"]} strum = "0.26" strum_macros = "0.26" diff --git a/src/lib.rs b/src/lib.rs index dcbdde4..ee70046 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ use metadata::{load_metadata, SourceDataRelease}; pub mod data_request_spec; pub mod geo; pub mod metadata; +pub mod parquet; pub struct Popgetter { pub metadata: SourceDataRelease, diff --git a/src/parquet.rs b/src/parquet.rs new file mode 100644 index 0000000..f1a09ac --- /dev/null +++ b/src/parquet.rs @@ -0,0 +1,141 @@ +use anyhow::{Context, Result}; +use polars::prelude::*; +use std::collections::HashSet; + +static GEO_ID_COL_NAME: &str = "GEO_ID"; + +pub struct MetricRequest { + pub column: String, + pub file: String, +} + +/// Given a `file_url` and a list of `columns`, return a `Result` +/// with the requested columns, filtered by `geo_id`s if nessesary +fn get_metrics_from_file( + file_url: &String, + columns: &[String], + geo_ids: Option<&[&str]>, +) -> Result { + let mut cols: Vec = columns.iter().map(|c| col(c)).collect(); + cols.push(col(GEO_ID_COL_NAME)); + + let args = ScanArgsParquet::default(); + + let df = LazyFrame::scan_parquet(file_url, args)? + .with_streaming(true) + .select(cols); + + let df = if let Some(ids) = geo_ids { + let id_series = Series::new("geo_ids", ids); + df.filter(col(GEO_ID_COL_NAME).is_in(lit(id_series))) + } else { + df + }; + + let result = df.collect()?; + Ok(result) +} + +/// Given a set of metrics and optional `geo_ids`, this function will +/// retrive all the required metrics from the cloud blob storage +/// +pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Result { + let file_list: HashSet = metrics.iter().map(|m| m.file.clone()).collect(); + + let dfs: Result> = file_list + .iter() + .map(|file_url| { + let file_cols: Vec = metrics + .iter() + .filter_map(|m| { + if m.file == file_url.clone() { + Some(m.column.clone()) + } else { + None + } + }) + .collect(); + get_metrics_from_file(file_url, &file_cols, geo_ids) + }) + .collect(); + + let mut joined_df: Option = None; + + // Merge the dataframes from each remove file in to a single + // dataframe + for df in dfs? { + if let Some(prev_dfs) = joined_df { + joined_df = Some(prev_dfs.join( + &df, + vec![GEO_ID_COL_NAME], + vec![GEO_ID_COL_NAME], + JoinArgs::new(JoinType::Inner), + )?); + } else { + joined_df = Some(df.clone()); + } + } + + joined_df.with_context(|| "Failed to combine data queries") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fetching_metrics() { + let metrics = [ + MetricRequest{ + file:"https://popgetter.blob.core.windows.net/popgetter-cli-test/tracts_2019_fiveYear.parquet".into(), + column:"B17021_E006".into() + }]; + let df = get_metrics(&metrics, None); + assert!(df.is_ok(), "We should get back a result"); + let df = df.unwrap(); + assert_eq!( + df.shape().1, + 2, + "The returned dataframe should have the correct number of columns" + ); + assert_eq!( + df.shape().0, + 74001, + "The returned dataframe should have the correct number of rows" + ); + assert!( + df.column(GEO_ID_COL_NAME).is_ok(), + "The returned dataframe should have a GEO_ID column" + ); + assert!( + df.column("B17021_E006").is_ok(), + "The returned dataframe should have the column we requested" + ); + } + + #[test] + fn test_fetching_metrics_with_geo_filter() { + let metrics = [ + MetricRequest{ + file:"https://popgetter.blob.core.windows.net/popgetter-cli-test/tracts_2019_fiveYear.parquet".into(), + column:"B17021_E006".into() + }]; + let df = get_metrics( + &metrics, + Some(&["1400000US01001020100", "1400000US01001020300"]), + ); + + assert!(df.is_ok(), "We should get back a result"); + let df = df.unwrap(); + assert_eq!( + df.shape().1, + 2, + "The returned dataframe should have the correct number of columns" + ); + assert_eq!( + df.shape().0, + 2, + "The returned dataframe should have the correct number of columns" + ); + } +}