diff --git a/Cargo.lock b/Cargo.lock index 784ff38..1d22042 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,21 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "addr2line" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "ahash" version = "0.7.8" @@ -107,29 +92,12 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "anyhow" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" - [[package]] name = "arrayvec" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" -[[package]] -name = "async-trait" -version = "0.1.81" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - [[package]] name = "autocfg" version = "1.3.0" @@ -147,27 +115,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "backtrace" -version = "0.3.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "bincode" version = "2.0.0-rc.3" @@ -211,15 +158,6 @@ dependencies = [ "wyz", ] -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "brro-compressor" version = "0.5.0" @@ -228,7 +166,7 @@ dependencies = [ "bincode", "clap", "criterion", - "env_logger 0.11.5", + "env_logger", "hound", "inverse_distance_weight", "log", @@ -375,7 +313,7 @@ version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn 2.0.72", @@ -405,15 +343,6 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - [[package]] name = "criterion" version = "0.5.1" @@ -481,16 +410,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" version = "1.3.0" @@ -510,7 +429,7 @@ dependencies = [ "brro-compressor", "clap", "csv", - "env_logger 0.11.5", + "env_logger", "log", "serde", "tempdir", @@ -527,22 +446,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "data-encoding" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "dtw_rs" version = "0.9.5" @@ -583,19 +486,6 @@ dependencies = [ "regex", ] -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime", - "is-terminal", - "log", - "regex", - "termcolor", -] - [[package]] name = "env_logger" version = "0.11.5" @@ -609,12 +499,6 @@ dependencies = [ "log", ] -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "errno" version = "0.3.9" @@ -637,33 +521,12 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "float-ord" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -676,95 +539,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-executor" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-macro" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -786,31 +560,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "gimli" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" - -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "half" version = "2.4.1" @@ -830,42 +579,6 @@ dependencies = [ "ahash", ] -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" - -[[package]] -name = "headers" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" -dependencies = [ - "base64", - "bytes", - "headers-core", - "http 0.2.12", - "httpdate", - "mime", - "sha1", -] - -[[package]] -name = "headers-core" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" -dependencies = [ - "http 0.2.12", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -878,96 +591,18 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys", -] - [[package]] name = "hound" version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" -[[package]] -name = "hyper" -version = "0.14.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http 0.2.12", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "iana-time-zone" version = "0.1.60" @@ -991,26 +626,6 @@ dependencies = [ "cc", ] -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" -dependencies = [ - "equivalent", - "hashbrown 0.14.5", -] - [[package]] name = "inverse_distance_weight" version = "0.1.1" @@ -1085,16 +700,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.22" @@ -1116,67 +721,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "mime_guess" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" -dependencies = [ - "mime", - "unicase", -] - -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" -dependencies = [ - "hermit-abi", - "libc", - "wasi", - "windows-sys", -] - -[[package]] -name = "multer" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" -dependencies = [ - "bytes", - "encoding_rs", - "futures-util", - "http 0.2.12", - "httparse", - "log", - "memchr", - "mime", - "spin", - "version_check", -] - -[[package]] -name = "multimap" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" - [[package]] name = "num-complex" version = "0.4.6" @@ -1206,17 +750,8 @@ dependencies = [ ] [[package]] -name = "object" -version = "0.36.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" +name = "once_cell" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" @@ -1226,91 +761,6 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" -[[package]] -name = "optimizer" -version = "0.1.0" -dependencies = [ - "chrono", - "clap", - "claxon", - "env_logger 0.11.5", - "hound", - "log", - "median", - "regex", -] - -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "pin-project" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "plotters" version = "0.3.6" @@ -1348,16 +798,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "prettyplease" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - [[package]] name = "primal-check" version = "0.3.4" @@ -1376,96 +816,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prom-remote-api" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a91e313f40839f01a242d526f12aff757b69d9c50f59c10947b58f50fad45e4" -dependencies = [ - "async-trait", - "bytes", - "env_logger 0.10.2", - "futures", - "prost", - "prost-build", - "snap", - "warp", -] - -[[package]] -name = "prometheus-remote" -version = "0.1.0" -dependencies = [ - "async-trait", - "chrono", - "clap", - "claxon", - "dtw_rs", - "env_logger 0.11.5", - "hound", - "log", - "median", - "prom-remote-api", - "regex", - "symphonia", - "tokio", - "warp", -] - -[[package]] -name = "prost" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" -dependencies = [ - "bytes", - "heck 0.4.1", - "itertools", - "lazy_static", - "log", - "multimap", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn 1.0.109", - "tempfile", - "which", -] - -[[package]] -name = "prost-derive" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prost-types" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" -dependencies = [ - "prost", -] - [[package]] name = "ptr_meta" version = "0.1.4" @@ -1588,15 +938,6 @@ dependencies = [ "rand_core 0.3.1", ] -[[package]] -name = "redox_syscall" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" -dependencies = [ - "bitflags 2.6.0", -] - [[package]] name = "regex" version = "1.10.5" @@ -1653,7 +994,7 @@ dependencies = [ "bitvec", "bytecheck", "bytes", - "hashbrown 0.12.3", + "hashbrown", "ptr_meta", "rend", "rkyv_derive", @@ -1673,12 +1014,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - [[package]] name = "rustfft" version = "6.2.0" @@ -1722,18 +1057,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "seahash" version = "4.1.0" @@ -1772,81 +1095,12 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" -dependencies = [ - "libc", -] - [[package]] name = "simdutf8" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "socket2" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - [[package]] name = "splines" version = "4.3.1" @@ -2048,35 +1302,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "thiserror" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - [[package]] name = "tinytemplate" version = "1.2.1" @@ -2102,60 +1327,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" -[[package]] -name = "tokio" -version = "1.39.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "windows-sys", -] - -[[package]] -name = "tokio-macros" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.72", -] - -[[package]] -name = "tokio-tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite", -] - -[[package]] -name = "tokio-util" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - [[package]] name = "tools" version = "0.1.0" @@ -2164,7 +1335,7 @@ dependencies = [ "clap", "claxon", "dtw_rs", - "env_logger 0.11.5", + "env_logger", "hound", "log", "median", @@ -2173,32 +1344,6 @@ dependencies = [ "wavbrro", ] -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" - -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "log", - "pin-project-lite", - "tracing-core", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", -] - [[package]] name = "transpose" version = "0.2.3" @@ -2209,84 +1354,18 @@ dependencies = [ "strength_reduce", ] -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" -dependencies = [ - "byteorder", - "bytes", - "data-encoding", - "http 1.1.0", - "httparse", - "log", - "rand 0.8.5", - "sha1", - "thiserror", - "url", - "utf-8", -] - [[package]] name = "typenum" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "url" -version = "2.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8parse" version = "0.2.2" @@ -2329,44 +1408,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "warp" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "headers", - "http 0.2.12", - "hyper", - "log", - "mime", - "mime_guess", - "multer", - "percent-encoding", - "pin-project", - "scoped-tls", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-tungstenite", - "tokio-util", - "tower-service", - "tracing", -] - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2431,7 +1472,7 @@ checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" name = "wavbrro" version = "0.1.0" dependencies = [ - "env_logger 0.11.5", + "env_logger", "log", "rkyv", "tempfile", @@ -2447,18 +1488,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 5f43db9..dd98cea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,6 @@ [workspace] members = [ "brro-compressor", - "optimizer", - "prometheus-remote", "tools", "wavbrro", "vsri", diff --git a/brro-compressor/src/compare.rs b/brro-compressor/src/compare.rs index 9925fe2..d6fbe47 100644 --- a/brro-compressor/src/compare.rs +++ b/brro-compressor/src/compare.rs @@ -14,17 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -use crate::{ - compressor::{ - constant::constant_compressor, - fft::fft, - polynomial::{polynomial, PolynomialType}, - }, - optimizer::utils::DataStats, -}; -use std::thread; - -/// Enum to represent the decision between compressors. #[derive(PartialEq, Debug)] enum CompressionDecision { Constant, @@ -32,66 +21,7 @@ enum CompressionDecision { Polynomial, } -impl CompressionDecision { - /// Function to perform compression and make a decision based on the results. - pub fn compress_and_decide() -> Result<(), Box> { - // Sample data for testing - let data = vec![1.0, 2.0, 3.0, 4.0, 5.0]; - let stats = DataStats::new(&data); - - // Clone data for each compressor - let data_constant = data.clone(); - let data_fft = data.clone(); - let data_polynomial = data.clone(); - - // Create threads for each compressor - let thread_constant = thread::spawn(move || constant_compressor(&data_constant, stats)); - let thread_fft = thread::spawn(move || fft(&data_fft)); - let thread_polynomial = - thread::spawn(move || polynomial(&data_polynomial, PolynomialType::Polynomial)); - - // Wait for threads to finish and collect their results with error handling - let result_constant = thread_constant - .join() - .map_err(|e| format!("Constant thread error: {:?}", e))?; - let result_fft = thread_fft - .join() - .map_err(|e| format!("FFT thread error: {:?}", e))?; - let result_polynomial = thread_polynomial - .join() - .map_err(|e| format!("Polynomial thread error: {:?}", e))?; - - // Use the decision logic to determine the compression decision - let decision = match ( - result_constant.compressed_data.len(), - result_fft.len(), - result_polynomial.len(), - ) { - (constant_len, fft_len, poly_len) - if constant_len < fft_len && constant_len < poly_len => - { - CompressionDecision::Constant - } - (_, fft_len, poly_len) if fft_len < poly_len => CompressionDecision::Fft, - _ => CompressionDecision::Polynomial, - }; - - // Use the decision to perform further actions - match decision { - CompressionDecision::Constant => { - println!("Selected Constant Compressor"); - } - CompressionDecision::Fft => { - println!("Selected FFT Compressor"); - } - CompressionDecision::Polynomial => { - println!("Selected Polynomial Compressor"); - } - } - - Ok(()) - } -} +impl CompressionDecision {} fn get_compression_decision( result_constant: &[f64], result_fft: &[f64], diff --git a/brro-compressor/src/compressor/constant.rs b/brro-compressor/src/compressor/constant.rs index 99b1dab..cc82e21 100644 --- a/brro-compressor/src/compressor/constant.rs +++ b/brro-compressor/src/compressor/constant.rs @@ -69,7 +69,6 @@ impl Decode for Constant { ) -> Result { let id = Decode::decode(decoder)?; let bitdepth = Decode::decode(decoder)?; - // Here is where the pig twists the tail let constant: f64 = match bitdepth { Bitdepth::U8 => { debug!("Decoding as u8"); @@ -112,11 +111,6 @@ impl Constant { } } - /// This compressor is about having a single constant for the whole segment - pub fn set_constant(&mut self, constant_value: f64) { - self.constant = constant_value; - } - /// Receives a data stream and generates a Constant pub fn decompress(data: &[u8]) -> Self { let config = BinConfig::get(); @@ -126,7 +120,6 @@ impl Constant { /// This function transforms the structure into a Binary stream pub fn to_bytes(&self) -> Vec { - // Use Bincode and flate2-rs? Do this at the Stream Level? let config = BinConfig::get(); bincode::encode_to_vec(self, config).unwrap() } @@ -141,9 +134,7 @@ impl Constant { pub fn constant_compressor(data: &[f64], stats: DataStats) -> CompressorResult { debug!("Initializing Constant Compressor. Error and Stats provided"); - // Initialize the compressor let c = Constant::new(data.len(), stats.min, stats.bitdepth); - // Convert to bytes CompressorResult::new(c.to_bytes(), 0.0) } diff --git a/brro-compressor/src/compressor/fft.rs b/brro-compressor/src/compressor/fft.rs index 4e00b15..4c9fa07 100644 --- a/brro-compressor/src/compressor/fft.rs +++ b/brro-compressor/src/compressor/fft.rs @@ -39,30 +39,6 @@ pub struct FrequencyPoint { } impl FrequencyPoint { - pub fn new(real: f32, img: f32) -> Self { - FrequencyPoint { - pos: 0, - freq_real: real, - freq_img: img, - } - } - - pub fn with_position(real: f32, img: f32, pos: u16) -> Self { - FrequencyPoint { - pos, - freq_real: real, - freq_img: img, - } - } - - pub fn from_complex(complex: Complex) -> Self { - FrequencyPoint { - pos: 0, - freq_real: complex.re, - freq_img: complex.im, - } - } - pub fn from_complex_with_position(complex: Complex, pos: u16) -> Self { FrequencyPoint { pos, @@ -132,15 +108,10 @@ impl Ord for FrequencyPoint { /// FFT Compressor. Applies FFT to a signal, picks the N best frequencies, discards the rest. Always LOSSY #[derive(PartialEq, Debug)] pub struct FFT { - /// Compressor ID pub id: u8, - /// Stored frequencies pub frequencies: Vec, - /// The maximum numeric value of the points in the frame pub max_value: f32, - /// The minimum numeric value of the points in the frame pub min_value: f32, - /// Compression error pub error: Option, } @@ -391,7 +362,7 @@ impl FFT { } /// Compresses data via FFT - /// The set of frequencies to store is 1/100 of the data lenght OR 3, which is bigger. + /// The set of frequencies to store is 1/100 of the data length OR 3, which is bigger. pub fn compress(&mut self, data: &[f64]) { if self.max_value == self.min_value { debug!("Same max and min, we're done here!"); @@ -415,8 +386,6 @@ impl FFT { buffer.truncate(size); self.frequencies = FFT::fft_trim(&mut buffer, max_freq); } - - /// Decompresses data pub fn decompress(data: &[u8]) -> Self { let config = BinConfig::get(); let (fft, _) = bincode::decode_from_slice(data, config).unwrap(); diff --git a/brro-compressor/src/compressor/mod.rs b/brro-compressor/src/compressor/mod.rs index 75f0370..4b71cd3 100644 --- a/brro-compressor/src/compressor/mod.rs +++ b/brro-compressor/src/compressor/mod.rs @@ -118,7 +118,6 @@ pub struct BinConfig { impl BinConfig { pub fn get() -> Configuration { - // Little endian and Variable int encoding config::standard() } } diff --git a/brro-compressor/src/compressor/noop.rs b/brro-compressor/src/compressor/noop.rs index 87ee570..e622fd4 100644 --- a/brro-compressor/src/compressor/noop.rs +++ b/brro-compressor/src/compressor/noop.rs @@ -34,17 +34,14 @@ impl Noop { data: Vec::with_capacity(sample_count), } } - ///Optimize pub fn optimize(data: &[f64]) -> Vec { let mut out_vec = Vec::with_capacity(data.len()); for &element in data { - // Round the floating-point number before casting to i64 out_vec.push(element.round() as i64); } out_vec } - /// "Compress" pub fn compress(&mut self, data: &[f64]) { self.data = Noop::optimize(data); debug!( @@ -54,20 +51,17 @@ impl Noop { ); } - /// Receives a data stream and generates a Noop pub fn decompress(data: &[u8]) -> Self { let config = BinConfig::get(); let (noop, _) = bincode::decode_from_slice(data, config).unwrap(); noop } - /// This function transforms the structure in a Binary stream to be appended to the frame pub fn to_bytes(&self) -> Vec { let config = BinConfig::get(); bincode::encode_to_vec(self, config).unwrap() } - /// Returns an array of data pub fn to_data(&self, _frame_size: usize) -> Vec { self.data.clone() } @@ -116,9 +110,8 @@ mod tests { #[test] fn test_optimize() { - // Test case with floating-point numbers that have fractional parts let input_data = [1.5, 2.7, 3.3, 4.9]; - let expected_output = [2, 3, 3, 5]; // Rounded to the nearest integer + let expected_output = [2, 3, 3, 5]; let result = Noop::optimize(&input_data); assert_eq!(result, expected_output); diff --git a/brro-compressor/src/compressor/polynomial.rs b/brro-compressor/src/compressor/polynomial.rs index 9d25791..531a82a 100644 --- a/brro-compressor/src/compressor/polynomial.rs +++ b/brro-compressor/src/compressor/polynomial.rs @@ -42,17 +42,12 @@ pub enum Method { #[derive(PartialEq, Debug, Clone)] pub struct Polynomial { - /// Compressor ID pub id: PolynomialType, - /// Stored Points pub data_points: Vec, pub min: f64, pub max: f64, - /// What is the base step between points pub point_step: u8, - /// Compression error pub error: Option, - /// Target bitdepth pub bitdepth: Bitdepth, } @@ -97,7 +92,6 @@ impl Decode for Polynomial { ) -> Result { let id = Decode::decode(decoder)?; let bitdepth = Decode::decode(decoder)?; - // Here is where the pig twists the tail let data_points: Vec = match bitdepth { Bitdepth::U8 => { debug!("Decoding as u8"); @@ -205,10 +199,6 @@ impl Polynomial { } } - fn locate_in_data_points(&self, point: f64) -> bool { - self.data_points.iter().any(|&i| i == point) - } - fn get_method(&self) -> Method { match self.id { PolynomialType::Idw => Method::Idw, @@ -223,7 +213,7 @@ impl Polynomial { } // TODO: Big one, read below // To reduce error we add more points to the polynomial, but, we also might add residuals - // each residual is 1/data_lenght * 100% less compression, each jump is 5% less compression. + // each residual is 1/data_length * 100% less compression, each jump is 5% less compression. // We can do the math and pick the one which fits better. let method = self.get_method(); let data_len = data.len(); @@ -280,7 +270,7 @@ impl Polynomial { } self.error = Some(current_err); debug!( - "Final Stored Data Lenght: {} Iterations: {}", + "Final Stored Data Length: {} Iterations: {}", self.data_points.len(), iterations ); @@ -314,7 +304,6 @@ impl Polynomial { self.point_step = step as u8; } - // --- MANDATORY METHODS --- pub fn compress(&mut self, data: &[f64]) { let points = if 3 >= (data.len() / 100) { 3 @@ -324,7 +313,6 @@ impl Polynomial { self.compress_hinted(data, points) } - /// Decompresses data pub fn decompress(data: &[u8]) -> Self { let config = BinConfig::get(); let (poly, _) = bincode::decode_from_slice(data, config).unwrap(); @@ -336,7 +324,6 @@ impl Polynomial { bincode::encode_to_vec(self, config).unwrap() } - // --- END OF MANDATORY METHODS --- /// Since IDW and Polynomial are the same code everywhere, this function prepares the data /// to be used by one of the polynomial decompression methods fn get_positions(&self, frame_size: usize) -> Vec { @@ -393,7 +380,6 @@ impl Polynomial { .map(|&f| f as f64) .collect(); let idw = IDW::new(points, self.data_points.clone()); - // Build the data (0..frame_size) .map(|f| { round_and_limit_f64( @@ -421,11 +407,8 @@ impl Polynomial { pub fn polynomial(data: &[f64], p_type: PolynomialType) -> Vec { info!("Initializing Polynomial Compressor"); let stats = DataStats::new(data); - // Initialize the compressor let mut c = Polynomial::new(data.len(), stats.min, stats.max, p_type, stats.bitdepth); - // Convert the data c.compress(data); - // Convert to bytes c.to_bytes() } @@ -436,14 +419,11 @@ pub fn polynomial_allowed_error( ) -> CompressorResult { info!("Initializing Polynomial Compressor"); let stats = DataStats::new(data); - // Initialize the compressor let mut c = Polynomial::new(data.len(), stats.min, stats.max, p_type, stats.bitdepth); - // Convert the data c.compress_bounded(data, allowed_error); CompressorResult::new(c.to_bytes(), c.error.unwrap_or(0.0)) } -/// Uncompress pub fn to_data(sample_number: usize, compressed_data: &[u8]) -> Vec { let c = Polynomial::decompress(compressed_data); c.to_data(sample_number) diff --git a/brro-compressor/src/data.rs b/brro-compressor/src/data.rs index 339cf2d..ea97dd9 100644 --- a/brro-compressor/src/data.rs +++ b/brro-compressor/src/data.rs @@ -27,7 +27,6 @@ pub struct CompressedStream { } impl CompressedStream { - /// Creates an empty compressor stream pub fn new() -> Self { CompressedStream { header: CompressorHeader::new(), @@ -87,8 +86,6 @@ impl CompressedStream { let (compressed_stream, _) = bincode::decode_from_slice(data, config).unwrap(); compressed_stream } - - /// Decompresses all the frames and returns a vector with the data pub fn decompress(&self) -> Vec { self.data_frames .iter() diff --git a/brro-compressor/src/frame/mod.rs b/brro-compressor/src/frame/mod.rs index caef28d..2a56612 100644 --- a/brro-compressor/src/frame/mod.rs +++ b/brro-compressor/src/frame/mod.rs @@ -21,14 +21,11 @@ use std::mem::size_of_val; const COMPRESSION_SPEED: [i32; 7] = [i32::MAX, 4096, 2048, 1024, 512, 256, 128]; -/// This is the structure of a compressor frame #[derive(Encode, Decode, Debug, Clone)] pub struct CompressorFrame { /// The frame size in bytes, frame_size: usize, - /// The number of samples in this frame, sample_count: usize, - /// The compressor used in the current frame compressor: Compressor, /// Output from the compressor data: Vec, @@ -48,7 +45,7 @@ impl CompressorFrame { } /// Calculates the size of the Frame and "closes it" - // TODO this is probably wrong, so we have to use the write stream to dump the bytes writen + // TODO this is probably wrong, so we have to use the write stream to dump the bytes written pub fn close(&mut self) { let size = size_of_val(&self.sample_count) + size_of_val(&self.compressor) @@ -57,13 +54,11 @@ impl CompressorFrame { self.frame_size = size; } - /// Compress a data and stores the result in the frame pub fn compress(&mut self, data: &[f64]) { self.sample_count = data.len(); self.data = self.compressor.compress(data); } - /// Compress a data and stores the result in the frame pub fn compress_bounded(&mut self, data: &[f64], max_error: f32) { self.sample_count = data.len(); self.data = self.compressor.compress_bounded(data, max_error as f64); @@ -75,15 +70,12 @@ impl CompressorFrame { // Speed factor limits the amount of data that is sampled to calculate the best compressor. // We need enough samples to do decent compression, minimum is 128 (2^7) let data_sample = COMPRESSION_SPEED[compression_speed] as usize; - // Eligible compressors for use let compressor_list = [Compressor::FFT, Compressor::Polynomial]; - // Do a statistical analysis of the data, let's see if we can pick a compressor out of this. let stats = DataStats::new(data); // Checking the statistical analysis and chose, if possible, a compressor // If the data is constant, well, constant frame if stats.min == stats.max { self.compressor = Compressor::Constant; - // Now do the full data compression self.data = self .compressor .get_compress_bounded_results(data, max_error as f64) @@ -106,13 +98,11 @@ impl CompressorFrame { .min_by_key(|x| x.0.compressed_data.len()) .unwrap(); self.compressor = *chosen_compressor; - // Now do the full data compression self.data = self .compressor .get_compress_bounded_results(data, max_error as f64) .compressed_data; } else { - // Run all the eligible compressors and choose smallest let compressor_results: Vec<_> = compressor_list .iter() .map(|compressor| { @@ -150,7 +140,6 @@ impl CompressorFrame { debug!("Auto Compressor Selection: {:?}", self.compressor); } - /// Decompresses a frame and returns the resulting data array pub fn decompress(&self) -> Vec { debug!( "Decompressing Frame. Size: {}, Samples: {}", diff --git a/brro-compressor/src/header.rs b/brro-compressor/src/header.rs index e7f398d..080beb0 100644 --- a/brro-compressor/src/header.rs +++ b/brro-compressor/src/header.rs @@ -16,7 +16,6 @@ limitations under the License. use bincode::{Decode, Encode}; -/// This will write the file headers #[derive(Encode, Decode, Debug, Clone)] pub struct CompressorHeader { initial_segment: [u8; 4], diff --git a/brro-compressor/src/lib.rs b/brro-compressor/src/lib.rs index 2b1d4ae..1b6492d 100644 --- a/brro-compressor/src/lib.rs +++ b/brro-compressor/src/lib.rs @@ -15,7 +15,7 @@ limitations under the License. */ #![allow(clippy::new_without_default)] -// Lucas - Once the project is far enough along I strongly reccomend reenabling dead code checks +// TODO: re-enable dead code checks #![allow(dead_code)] pub mod compare; diff --git a/brro-compressor/src/main.rs b/brro-compressor/src/main.rs index 69e7561..d261382 100644 --- a/brro-compressor/src/main.rs +++ b/brro-compressor/src/main.rs @@ -24,31 +24,23 @@ use std::error::Error; use std::path::PathBuf; use wavbrro::wavbrro::WavBrro; -/// Processes the given input based on the provided arguments. fn process_args(arguments: &Args) -> Result<(), Box> { let metadata = std::fs::metadata(&arguments.input)?; - // If the input path points to a single file if metadata.is_file() { debug!("Target is a file"); process_single_file(arguments.input.clone(), arguments)?; - } - // If the input path points to a directory - else if metadata.is_dir() { + } else if metadata.is_dir() { debug!("Target is a directory"); process_directory(arguments)?; - } - // If the input path is neither a file nor a directory - else { + } else { return Err("The provided path is neither a file nor a directory.".into()); } Ok(()) } -/// Processes all files in a given directory. fn process_directory(arguments: &Args) -> Result<(), Box> { - // Assuming you want to process each file inside this directory for entry in std::fs::read_dir(arguments.input.clone())? { let path = entry?.path(); if path.is_file() { @@ -67,14 +59,11 @@ fn process_directory(arguments: &Args) -> Result<(), Box> { Ok(()) } -/// Processes a single file. fn process_single_file(mut file_path: PathBuf, arguments: &Args) -> Result<(), Box> { debug!("Processing single file..."); if arguments.uncompress { - //read if let Some(vec) = bro_reader::read_file(&file_path)? { let arr: &[u8] = &vec; - //decompress let decompressed_data = decompress_data(arr); if arguments.verbose { println!("Output={:?}", decompressed_data); @@ -83,29 +72,22 @@ fn process_single_file(mut file_path: PathBuf, arguments: &Args) -> Result<(), B WavBrro::to_file_with_data(&file_path, &decompressed_data) } } else { - // Read an WavBRRO file and compress it let data = WavBrro::from_file(&file_path)?; if arguments.verbose { println!("Input={:?}", data); } - //compress let compressed_data = compress_data(&data, arguments); - //write file_path.set_extension("bro"); std::fs::write(file_path, compressed_data)?; } Ok(()) } -/// Compresses the data based on the provided tag and arguments. fn compress_data(vec: &[f64], arguments: &Args) -> Vec { debug!("Compressing data!"); - //let optimizer_results = optimizer::process_data(vec, tag); - // Create Optimization Plan and Stream for the data. let mut op = OptimizerPlan::plan(vec); let mut cs = CompressedStream::new(); - // Assign the compressor if it was selected match arguments.compressor { CompressorType::Noop => op.set_compressor(Compressor::Noop), CompressorType::Constant => op.set_compressor(Compressor::Constant), @@ -116,7 +98,6 @@ fn compress_data(vec: &[f64], arguments: &Args) -> Vec { } for (cpr, data) in op.get_execution().into_iter() { debug!("Chunk size: {}", data.len()); - // If compressor is a losseless one, compress with the error defined, or default match arguments.compressor { CompressorType::Fft | CompressorType::Polynomial @@ -133,7 +114,6 @@ fn compress_data(vec: &[f64], arguments: &Args) -> Vec { cs.to_bytes() } -/// Compresses the data based on the provided tag and arguments. fn decompress_data(compressed_data: &[u8]) -> Vec { debug!("decompressing data!"); let cs = CompressedStream::from_bytes(compressed_data); @@ -143,10 +123,8 @@ fn decompress_data(compressed_data: &[u8]) -> Vec { #[derive(Parser, Default, Debug)] #[command(author, version, about="A Time-Series compressor", long_about = None)] struct Args { - /// input file input: PathBuf, - /// Select a compressor, default is auto #[arg(long, value_enum, default_value = "auto")] compressor: CompressorType, @@ -157,7 +135,6 @@ struct Args { #[arg(short, long, default_value_t = 5, value_parser = clap::value_parser!(u8).range(0..51))] error: u8, - /// Uncompresses the input file/directory #[arg(short, action)] uncompress: bool, @@ -169,7 +146,6 @@ struct Args { #[arg(short, long, default_value_t = 0, value_parser = clap::value_parser!(u8).range(0..7))] compression_selection_sample_level: u8, - /// Verbose output, dumps everysample in the input file (for compression) and in the ouput file (for decompression) #[arg(long, action)] verbose: bool, } diff --git a/brro-compressor/src/optimizer/mod.rs b/brro-compressor/src/optimizer/mod.rs index 2756aaa..2351d01 100644 --- a/brro-compressor/src/optimizer/mod.rs +++ b/brro-compressor/src/optimizer/mod.rs @@ -16,18 +16,14 @@ limitations under the License. use crate::{ compressor::Compressor, - types, utils::{f64_to_u64, prev_power_of_two}, }; -use log::debug; -use types::metric_tag::MetricTag; pub mod utils; /// Max Frame size, this can aprox. 36h of data at 1point/sec rate, a little more than 1 week at 1point/5sec /// and 1 month (30 days) at 1 point/20sec. /// This would be aprox. 1MB of Raw data (131072 * 64bits). -/// We wouldn't want to decompressed a ton of uncessary data, but for historical view of the data, looking into 1day/week/month at once is very reasonable const MAX_FRAME_SIZE: usize = 131072; // 2^17 /// The Min frame size is one that allows our compressors potentially achieve 100x compression. Currently the most /// limited one is the FFT compressor, that needs 3 frequencies at minimum, 3x100 = 300, next power of 2 is 512. @@ -47,7 +43,6 @@ pub struct OptimizerPlan { } impl OptimizerPlan { - /// Creates an optimal data compression plan pub fn plan(data: &[f64]) -> Self { let c_data = OptimizerPlan::clean_data(data); let chunks = OptimizerPlan::get_chunks_sizes(c_data.len()); @@ -59,28 +54,12 @@ impl OptimizerPlan { } } - /// Creates an optimal plan for compression for the data set provided bound by a given error - pub fn plan_bounded(data: &[f64], max_error: f32) -> Self { - // TODO: Check error limits - let c_data = OptimizerPlan::clean_data(data); - let chunks = OptimizerPlan::get_chunks_sizes(c_data.len()); - let optimizer = OptimizerPlan::assign_compressor(&c_data, &chunks, Some(max_error)); - OptimizerPlan { - data: c_data, - chunk_sizes: chunks, - compressors: optimizer, - } - } - - /// Sets a given compressor for all data chunks pub fn set_compressor(&mut self, compressor: Compressor) { let new_compressors = vec![compressor; self.compressors.len()]; self.compressors = new_compressors; } - /// Removes NaN and infinite references from the data pub fn clean_data(wav_data: &[f64]) -> Vec { - // Cleaning data, removing NaN, etc. This might reduce sample count wav_data .iter() .filter(|x| !(x.is_nan() || x.is_infinite())) @@ -115,7 +94,6 @@ impl OptimizerPlan { chunk_sizes } - /// Returns a vector with the data slice and the compressor associated pub fn get_execution(&self) -> Vec<(&Compressor, &[f64])> { let mut output = Vec::with_capacity(self.chunk_sizes.len()); let mut s = 0; @@ -126,15 +104,11 @@ impl OptimizerPlan { output } - /// Walks the data, checks how much variability is in the data, and assigns a compressor based on that - /// NOTE: Is this any good? fn get_compressor(data: &[f64]) -> Compressor { let _ = data.iter().map(|&f| f64_to_u64(f, 0)); - // For now, let's just return FFT Compressor::FFT } - /// Assigns a compressor to a chunk of data fn assign_compressor( clean_data: &[f64], chunks: &[usize], @@ -155,17 +129,6 @@ impl OptimizerPlan { } } -/// This should look at the data and return an optimized dataset for a specific compressor, -/// If a compressor is hand picked, this should be skipped. -pub fn process_data(wav_data: &[f64], tag: &MetricTag) -> Vec { - debug!("Tag: {:?} Len: {}", tag, wav_data.len()); - wav_data - .iter() - .filter(|x| !(x.is_nan() || x.is_infinite())) - .copied() - .collect() -} - #[cfg(test)] mod tests { use super::*; diff --git a/brro-compressor/src/optimizer/utils.rs b/brro-compressor/src/optimizer/utils.rs index 3486729..4700d2f 100644 --- a/brro-compressor/src/optimizer/utils.rs +++ b/brro-compressor/src/optimizer/utils.rs @@ -24,26 +24,19 @@ pub enum Bitdepth { I16, U8, } -/// Data structure that holds statictical information about the data provided + pub struct DataStats { - // Max value pub max: f64, - // Max value location in the array pub max_loc: usize, - // Min value pub min: f64, - // Min value location in the array pub min_loc: usize, - // Mean of the data pub mean: f64, - // Bitdepth that this data can be pub bitdepth: Bitdepth, pub fractional: bool, } impl DataStats { pub fn new(data: &[f64]) -> Self { - // Statistical data stored let mut min: f64 = data[0]; let mut min_loc = 0; let mut max: f64 = data[0]; @@ -52,7 +45,6 @@ impl DataStats { let mut mean: f64 = 0.0; let mut recommended_bitdepth = Bitdepth::F64; - // Walk the data and perform the analysis for (i, value) in data.iter().enumerate() { let t_value = *value; mean += value; @@ -70,7 +62,7 @@ impl DataStats { } mean /= data.len() as f64; // Check max size of values - // For very large numbers (i32 and i64), it might be ideal to detect the dc component + // TODO: for very large numbers (i32 and i64), it might be ideal to detect the dc component // of the signal. And then remove it later let max_int = split_n(max).0; // This is the DC component let min_int = split_n(min).0; @@ -118,19 +110,6 @@ impl DataStats { } } } - -fn as_i8(value: f64) -> i8 { - split_n(value).0 as i8 -} - -fn as_i16(value: f64) -> i16 { - split_n(value).0 as i16 -} - -fn as_i32(value: f64) -> i32 { - split_n(value).0 as i32 -} - fn split_n(x: f64) -> (i64, f64) { const FRACT_SCALE: f64 = 1.0 / (65536.0 * 65536.0 * 65536.0 * 65536.0); // 1_f64.exp(-64) const STORED_MANTISSA_DIGITS: u32 = f64::MANTISSA_DIGITS - 1; @@ -177,57 +156,6 @@ fn split_n(x: f64) -> (i64, f64) { (0, 0.0) } } - -fn analyze_data(data: &Vec) -> (i32, i64, bool) { - let mut min: f64 = 0.0; - let mut max: f64 = 0.0; - let mut fractional = false; - for value in data { - let t_value = *value; - if split_n(t_value).1 != 0.0 { - fractional = true; - } - if t_value > max { - max = t_value - }; - if t_value < min { - min = t_value - }; - } - // Check max size of values - // For very large numbers (i32 and i64), it might be ideal to detect the dc component - // of the signal. And then remove it later - let max_int = split_n(max).0; // This is the DC component - let min_int = split_n(min).0; - - // Finding the bitdepth without the DC component - let recommended_bitdepth = find_bitdepth(max_int - min_int, min_int); - debug!( - "Recommended Bitdepth: {}, Fractional: {}", - recommended_bitdepth, fractional - ); - (recommended_bitdepth, min_int, fractional) -} - -fn find_bitdepth(max_int: i64, min_int: i64) -> i32 { - // Check where those ints fall into - let bitdepth = match max_int { - _ if max_int <= u8::MAX.into() => 8, - _ if max_int <= i16::MAX.into() => 16, - _ if max_int <= i32::MAX.into() => 32, - _ => 64, - }; - - let bitdepth_signed = match min_int { - _ if min_int == 0 => 8, - _ if min_int >= i16::MIN.into() => 16, - _ if min_int >= i32::MIN.into() => 32, - _ => 64, - }; - - bitdepth.max(bitdepth_signed) -} - #[cfg(test)] mod tests { use super::*; diff --git a/brro-compressor/src/types/metric_tag.rs b/brro-compressor/src/types/metric_tag.rs index 7748a4b..eb4b7c8 100644 --- a/brro-compressor/src/types/metric_tag.rs +++ b/brro-compressor/src/types/metric_tag.rs @@ -14,21 +14,22 @@ See the License for the specific language governing permissions and limitations under the License. */ -use median::Filter; - #[derive(Debug)] pub enum MetricTag { + // Represents a percentage value. Precision is reduced to 2 significant digits. Percent(i32), - // If it is a percent reduce significant digits to 2 + // Represents a duration value. Precision is reduced to 1 microsecond. Duration(i32), - // if it is a duration reduce precision to 1 microsecond + // Represents a metric with a float representation where precision is not required. NotFloat, - // A metric that has a float representation but shouldn't (Eg. Precision is not needed) + // Represents a quasi-random metric, such as network deltas or heap memory changes, + // which exhibit unpredictable behavior. QuasiRandom, - // A metric that exhibits a quasi random sample behavior. (E.g. Network deltas, heap memory) + // Represents data in bytes. Should be converted to a human-readable format, + // such as KB or MB. Bytes(i32), - // Data that is in bytes... Make it MB, or KB - Other, // Everything else + // Represents any other type of metric that does not fit into the predefined categories. + Other, } impl MetricTag { @@ -47,16 +48,4 @@ impl MetricTag { fn to_multiply_and_truncate(number: f64, mul: i32) -> i64 { (number * mul as f64) as i64 } - - fn to_median_filter(data: &[f64]) -> Vec { - let mut filtered = Vec::with_capacity(data.len()); - // 10minutes of data - let mut filter = Filter::new(50); - for point in data { - let point_int = MetricTag::QuasiRandom.from_float(*point); - let median = filter.consume(point_int); - filtered.push(median) - } - filtered - } } diff --git a/brro-compressor/src/utils/error.rs b/brro-compressor/src/utils/error.rs index 9303ae5..d472b5d 100644 --- a/brro-compressor/src/utils/error.rs +++ b/brro-compressor/src/utils/error.rs @@ -40,17 +40,11 @@ impl ErrorMethod { /// This function calculates the error between 2 arrays of f64. The results are from 0 to .. /// Being 0, no error, 1 - 100% error and so on. -/// This uses the default function to calculte it. +/// This uses the default function to calculate it. pub fn calculate_error(original: &[f64], generated: &[f64]) -> f64 { ErrorMethod::error(ErrorMethod::default(), original, generated) } -/// This function calculates the error between 2 arrays of f64. The results are from 0 to .. -/// Being 0, no error, 1 - 100% error and so on. -/// This uses the provided method to calculte it. -pub fn calculate_error_method(original: &[f64], generated: &[f64], method: ErrorMethod) -> f64 { - ErrorMethod::error(method, original, generated) -} /// Calculates the mean squared error between two vectors. /// /// # Arguments diff --git a/brro-compressor/src/utils/mod.rs b/brro-compressor/src/utils/mod.rs index 263ab0e..c00015d 100644 --- a/brro-compressor/src/utils/mod.rs +++ b/brro-compressor/src/utils/mod.rs @@ -20,7 +20,7 @@ pub mod writers; pub const DECIMAL_PRECISION: u32 = 5; -// Is this the right place? +// TODO: check if it is the right place? pub fn prev_power_of_two(n: usize) -> usize { // n = 0 gives highest_bit_set_idx = 0. let highest_bit_set_idx = 63 - (n | 1).leading_zeros(); @@ -58,11 +58,6 @@ pub fn f64_to_u64(number: f64, precision: usize) -> u64 { (number * mul as f64) as u64 } -pub fn round_f32(x: f32, decimals: u32) -> f64 { - let y = 10i32.pow(decimals) as f64; - (x as f64 * y).round() / y -} - pub fn round_f64(x: f64, decimals: u32) -> f64 { let y = 10i32.pow(decimals) as f64; (x * y).round() / y diff --git a/brro-compressor/src/utils/readers/bro_reader.rs b/brro-compressor/src/utils/readers/bro_reader.rs index 3ee0b36..f2b7e37 100644 --- a/brro-compressor/src/utils/readers/bro_reader.rs +++ b/brro-compressor/src/utils/readers/bro_reader.rs @@ -44,28 +44,3 @@ fn is_bro_file(file_path: &Path) -> io::Result { // Check if the file starts with "BRRO" Ok(header.starts_with(b"BRRO")) } - -/// Read a file by chunks and processes the chunks -pub fn process_by_chunk(file_path: &Path) -> Result<(), std::io::Error> { - let mut file = std::fs::File::open(file_path)?; - - let mut list_of_chunks = Vec::new(); - // 64KB at a time, assuming 64Bit samples, ~1024 samples. - let chunk_size = 0x10000; - - loop { - let mut chunk = Vec::with_capacity(chunk_size); - let n = file - .by_ref() - .take(chunk_size as u64) - .read_to_end(&mut chunk)?; - if n == 0 { - break; - } - list_of_chunks.push(chunk); - if n < chunk_size { - break; - } - } - Ok(()) -} diff --git a/csv-compressor/src/csv.rs b/csv-compressor/src/csv.rs index d8372aa..fb38ddb 100644 --- a/csv-compressor/src/csv.rs +++ b/csv-compressor/src/csv.rs @@ -46,7 +46,6 @@ pub fn read_samples_from_csv_file(dest: &Path) -> Result, csv::Error reader.deserialize().collect() } -/// Writes samples to file at dest as csv pub fn write_samples_to_csv_file(dest: &Path, samples: &[Sample]) -> Result<(), csv::Error> { let mut csv_file = File::create(dest)?; let mut writer = csv::Writer::from_writer(&mut csv_file); @@ -89,7 +88,6 @@ mod tests { TempDir::new("test_read_samples").expect("Unable to create temporary directory"); let path = temp_dir.path().join("samples.csv"); - // Writing content to test file let mut file = File::create(&path).expect("Unable to create test file"); file.write_all(csv_content.as_bytes()) .expect("Unable to write data"); diff --git a/csv-compressor/src/main.rs b/csv-compressor/src/main.rs index 17ebb26..04a6299 100644 --- a/csv-compressor/src/main.rs +++ b/csv-compressor/src/main.rs @@ -34,10 +34,8 @@ mod metric; author, version, about = "A Time-Series compressor utilizes Brro Compressor for CSV format", long_about = None )] pub struct Args { - /// Path to input input: PathBuf, - /// Defines where the result will be stored #[arg(short, long, action)] output: Option, @@ -92,14 +90,10 @@ enum CompressorType { Idw, } -/// Compresses the data based on the provided tag and arguments. fn compress_data(vec: &[f64], arguments: &Args) -> Vec { debug!("Compressing data!"); - //let optimizer_results = optimizer::process_data(vec, tag); - // Create Optimization Plan and Stream for the data. let mut op = OptimizerPlan::plan(vec); let mut cs = CompressedStream::new(); - // Assign the compressor if it was selected match arguments.compressor { CompressorType::Noop => op.set_compressor(Compressor::Noop), CompressorType::Constant => op.set_compressor(Compressor::Constant), @@ -110,7 +104,6 @@ fn compress_data(vec: &[f64], arguments: &Args) -> Vec { } for (cpr, data) in op.get_execution().into_iter() { debug!("Chunk size: {}", data.len()); - // If compressor is a losseless one, compress with the error defined, or default match arguments.compressor { CompressorType::Fft | CompressorType::Polynomial @@ -127,14 +120,12 @@ fn compress_data(vec: &[f64], arguments: &Args) -> Vec { cs.to_bytes() } -/// Compresses the data based on the provided tag and arguments. fn decompress_data(compressed_data: &[u8]) -> Vec { debug!("decompressing data!"); let cs = CompressedStream::from_bytes(compressed_data); cs.decompress() } -/// process_csv opens and parses the content of file at path pub fn process_csv(path: &Path) -> Metric { let samples = csv::read_samples_from_csv_file(path).expect("failed to read samples from file"); Metric::from_samples(&samples).expect("failed to create metric from samples") @@ -147,18 +138,15 @@ fn process_args(args: Args) { .unwrap_or_else(|| args.input.clone()) .clone(); - // uncompressing input if args.uncompress { debug!("Starting uncompressing of {:?}", &args.input); if let Some(data) = read_file(&args.input).expect("failed to read bro file") { - // decomressing data and creating wavbrro from it let decompressed_data = decompress_data(&data); let mut wbro = WavBrro::new(); for data in decompressed_data.iter() { wbro.add_sample(*data); } - // // reading existing index let mut vsri_file_path = args.input.clone(); vsri_file_path.set_extension("vsri"); debug!("Reading vsri at {:?}", &output_base); @@ -174,7 +162,6 @@ fn process_args(args: Args) { let samples = metric.get_samples(); - // creating csv output file let mut csv_file_path = file_path.clone(); csv_file_path.set_extension("csv"); debug!("Writing samples into csv file"); @@ -199,7 +186,6 @@ fn process_args(args: Args) { .expect("failed to flush vsri to the file"); } - // compressing input if no_compression is not set if !args.no_compression { debug!("Starting compressing"); let data = metric.wbro.get_samples(); diff --git a/csv-compressor/src/metric.rs b/csv-compressor/src/metric.rs index d1c0747..6f0f615 100644 --- a/csv-compressor/src/metric.rs +++ b/csv-compressor/src/metric.rs @@ -20,12 +20,9 @@ use std::path::Path; use vsri::{day_elapsed_seconds, Vsri}; use wavbrro::wavbrro::WavBrro; -/// Metric is responsible for generating WavBrro and VSRI from parsed Samples #[derive(Default)] pub struct Metric { - /// Metric data itself pub wbro: WavBrro, - /// Metric indexes pub vsri: Vsri, } @@ -49,15 +46,12 @@ impl Display for Error { impl std::error::Error for Error {} impl Metric { - /// Creates new WavBrro instance pub fn new(wbro: WavBrro, vsri: Vsri) -> Self { Metric { wbro, vsri } } - /// Appends samples to the metric pub fn append_samples(&mut self, samples: &[Sample]) -> Result<(), Error> { for sample in samples { - // For solution simplification it generates only 1 WavBrro and 1 VSRI let ts = day_elapsed_seconds(sample.timestamp / 1000); self.vsri .update_for_point(ts) @@ -69,19 +63,16 @@ impl Metric { Ok(()) } - /// Creates default metric from the existing samples pub fn from_samples(samples: &[Sample]) -> Result { let mut metric = Metric::default(); metric.append_samples(samples)?; Ok(metric) } - /// Flushes underlying WavBrro formatted metrics to the file at path pub fn flush_wavbrro(&self, path: &Path) { self.wbro.to_file(path) } - /// Flushes underlying VSRI to the file at path pub fn flush_indexes(&self, path: &Path) -> Result<(), std::io::Error> { self.vsri.flush_to(path) } diff --git a/optimizer/Cargo.toml b/optimizer/Cargo.toml deleted file mode 100644 index d3004d1..0000000 --- a/optimizer/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "optimizer" -version = "0.1.0" -authors = ["Carlos Rolo "] -edition = "2021" -license = "Apache-2.0" -description = "Optimizer stage for the compressor." - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] -hound = "3.5" -chrono = "0.4.26" -claxon = "0.4.3" -env_logger = "0.11.0" -log = "0.4.0" -clap = {version = "4.3.14", features = ["derive"] } -regex = "1.9.1" -median = "0.3.2" \ No newline at end of file diff --git a/optimizer/src/main.rs b/optimizer/src/main.rs deleted file mode 100644 index 03f0bab..0000000 --- a/optimizer/src/main.rs +++ /dev/null @@ -1,454 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Lucas - Once the project is far enough along I strongly reccomend reenabling dead code checks -#![allow(dead_code)] - -use clap::{arg, command, Parser}; -use hound::{WavSpec, WavWriter}; -use log::{debug, error, info}; -use median::Filter; -use regex::Regex; -use std::fs; -use std::io::Write; -use std::path::PathBuf; -use std::{fs::File, path::Path}; - -#[derive(Debug)] -enum MetricTag { - Percent(i32), // If it is a percent reduce significant digits to 2 - Duration(i32), // if it is a duration reduce precision to 1 microsecond - NotFloat, // A metric that has a float representation but shouldn't (Eg. Precision is not needed) - QuasiRandom, // A metric that exhibits a quasi random sample behavior. (E.g. Network deltas, heap memory) - Bytes(i32), // Data that is in bytes... Make it MB, or KB - Other, // Everything else -} - -impl MetricTag { - #[allow(clippy::wrong_self_convention)] - fn from_float(&self, x: f64) -> i64 { - match self { - MetricTag::Other => 0, - MetricTag::NotFloat | MetricTag::QuasiRandom => x as i64, - MetricTag::Percent(y) => to_multiply_and_truncate(x, *y), - MetricTag::Duration(y) => to_multiply_and_truncate(x, *y), - MetricTag::Bytes(y) => (x as i64) / (*y as i64), - } - } -} - -/* -Reads a WAV file, checks the channels and the information contained there. From that -information takes a decision on the best channel, block size and bitrate for the BRRO -encoders. -*/ - -/* Read a WAV file, */ -fn read_metrics_from_wav(filename: &str) -> Vec { - let r_reader = hound::WavReader::open(filename); - let mut reader = match r_reader { - Ok(reader) => reader, - Err(_err) => { - return Vec::new(); - } - }; - let num_channels = reader.spec().channels as usize; - - let mut raw_data: Vec = Vec::new(); - let mut u64_holder: [u16; 4] = [0, 0, 0, 0]; - - // Iterate over the samples and channels and push each sample to the vector - let mut current_channel: usize = 0; - for sample in reader.samples::() { - u64_holder[current_channel] = sample.unwrap() as u16; - current_channel += 1; - if current_channel == num_channels { - raw_data.push(join_u16_into_f64(u64_holder)); - current_channel = 0; - } - } - raw_data -} - -fn generate_wav_header(channels: Option, bitdepth: u16, samplerate: u32) -> WavSpec { - hound::WavSpec { - channels: channels.unwrap_or(4) as u16, - // TODO: Sample rate adaptations - sample_rate: samplerate, - bits_per_sample: bitdepth, - sample_format: hound::SampleFormat::Int, - } -} - -/// Write a WAV file with the outputs of data analysis for float data -fn write_optimal_wav(filename: &str, data: Vec, bitdepth: i32, dc: i64, channels: i32) { - // Make DC a float for operations - let fdc = dc as f64; - let header: WavSpec = generate_wav_header(Some(channels), bitdepth as u16, 8000); - let mut file_path = filename.to_string(); - file_path.truncate(file_path.len() - 4); - file_path = format!("{}_OPT.wav", file_path); - let file = File::create(file_path).unwrap(); - let mut wav_writer = WavWriter::new(file, header).unwrap(); - for sample in data { - let _ = match bitdepth { - 8 => wav_writer.write_sample(as_i8(sample - fdc)), - 16 => wav_writer.write_sample(as_i16(sample - fdc)), - _ => wav_writer.write_sample(as_i32(sample - fdc)), - }; - } - let _ = wav_writer.finalize(); -} - -fn write_optimal_int_wav(filename: &str, data: Vec, bitdepth: i32, dc: i64, channels: i32) { - let header: WavSpec = generate_wav_header(Some(channels), bitdepth as u16, 8000); - let mut file_path = filename.to_string(); - file_path.truncate(file_path.len() - 4); - file_path = format!("{}_OPT.wav", file_path); - let file = File::create(file_path).unwrap(); - let mut wav_writer = WavWriter::new(file, header).unwrap(); - for sample in data { - let _ = match bitdepth { - 8 => wav_writer.write_sample((sample - dc) as i8), - 16 => wav_writer.write_sample((sample - dc) as i16), - _ => wav_writer.write_sample((sample - dc) as i32), - }; - } - let _ = wav_writer.finalize(); -} - -fn as_i8(value: f64) -> i8 { - split_n(value).0 as i8 -} - -fn as_i16(value: f64) -> i16 { - split_n(value).0 as i16 -} - -fn as_i32(value: f64) -> i32 { - split_n(value).0 as i32 -} - -// Split a float into an integer -fn split_n(x: f64) -> (i64, f64) { - const FRACT_SCALE: f64 = 1.0 / (65536.0 * 65536.0 * 65536.0 * 65536.0); // 1_f64.exp(-64) - const STORED_MANTISSA_DIGITS: u32 = f64::MANTISSA_DIGITS - 1; - const STORED_MANTISSA_MASK: u64 = (1 << STORED_MANTISSA_DIGITS) - 1; - const MANTISSA_MSB: u64 = 1 << STORED_MANTISSA_DIGITS; - - const EXPONENT_BITS: u32 = 64 - 1 - STORED_MANTISSA_DIGITS; - const EXPONENT_MASK: u32 = (1 << EXPONENT_BITS) - 1; - const EXPONENT_BIAS: i32 = (1 << (EXPONENT_BITS - 1)) - 1; - - let bits = x.to_bits(); - let is_negative = (bits as i64) < 0; - let exponent = ((bits >> STORED_MANTISSA_DIGITS) as u32 & EXPONENT_MASK) as i32; - - let mantissa = (bits & STORED_MANTISSA_MASK) | MANTISSA_MSB; - let mantissa = if is_negative { - -(mantissa as i64) - } else { - mantissa as i64 - }; - - let shl = exponent + (64 - f64::MANTISSA_DIGITS as i32 - EXPONENT_BIAS + 1); - if shl <= 0 { - let shr = -shl; - if shr < 64 { - // x >> 0..64 - let fraction = ((mantissa as u64) >> shr) as f64 * FRACT_SCALE; - (0, fraction) - } else { - // x >> 64.. - (0, 0.0) - } - } else if shl < 64 { - // x << 1..64 - let int = mantissa >> (64 - shl); - let fraction = ((mantissa as u64) << shl) as f64 * FRACT_SCALE; - (int, fraction) - } else if shl < 128 { - // x << 64..128 - let int = mantissa << (shl - 64); - (int, 0.0) - } else { - // x << 128.. - (0, 0.0) - } -} - -fn join_u16_into_f64(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) -} - -fn get_max(a: i32, b: i32) -> i32 { - a.max(b) -} - -/// Converts a float via multiplication and truncation -fn to_multiply_and_truncate(number: f64, mul: i32) -> i64 { - (number * mul as f64) as i64 -} - -fn to_median_filter(data: &Vec) -> Vec { - let mut filtered = Vec::with_capacity(data.len()); - // 10minutes of data - let mut filter = Filter::new(50); - for point in data { - let point_int = MetricTag::QuasiRandom.from_float(*point); - let median = filter.consume(point_int); - filtered.push(median) - } - filtered -} - -/// Check the type of metric and tag it -fn tag_metric(filename: &str) -> MetricTag { - // Should sort this by the probability of each tag, so the ones that are more common are dealt first - // If it says percent_ or _utilization - let mut regex = Regex::new(r"(?m)percent_|_utilization").unwrap(); - if regex.captures(filename).is_some() { - // 2 significant digits resolution (Linux resolution) - return MetricTag::Percent(100); - } - // if it says _client_request - regex = Regex::new(r"(?m)_client_request").unwrap(); - if regex.captures(filename).is_some() { - // Fractional requests are nothing but an averaging artifact - return MetricTag::NotFloat; - } - // if it says _seconds - regex = Regex::new(r"(?m)_seconds").unwrap(); - if regex.captures(filename).is_some() { - // 1 micro second resolution - return MetricTag::Duration(1_000_000); - } - // if it says _seconds - regex = Regex::new(r"(?m)_networkindelta|_networkoutdelta|_heapmemoryused_").unwrap(); - if regex.captures(filename).is_some() { - return MetricTag::QuasiRandom; - } - MetricTag::Other -} - -/// Go through the data, check min and max values, DC Component -/// Check if data fits in 8,16,24,32 bits. If so reduce it to a single channel with -/// those bit depths. -fn analyze_data(data: &Vec) -> (i32, i64, bool) { - let mut min: f64 = 0.0; - let mut max: f64 = 0.0; - let mut fractional = false; - for value in data { - let t_value = *value; - if split_n(t_value).1 != 0.0 { - fractional = true; - } - if t_value > max { - max = t_value - }; - if t_value < min { - min = t_value - }; - } - // Check max size of values - // For very large numbers (i32 and i64), it might be ideal to detect the dc component - // of the signal. And then remove it later - let max_int = split_n(max).0; // This is the DC component - let min_int = split_n(min).0; - - // If fractional is it relevant? - let max_frac = split_n(max).1; - - // Finding the bitdepth without the DC component - let recommended_bitdepth = find_bitdepth(max_int - min_int, min_int); - if !fractional { - info!(" Recommended Bitdepth: {} ", recommended_bitdepth); - } else { - info!( - " Fractional, Recommended Bitdepth: {}, Fractions max: {}", - recommended_bitdepth, max_frac - ); - } - (recommended_bitdepth, min_int, fractional) -} - -fn analyze_int_data(data: &Vec) -> (i32, i64) { - let mut min: i64 = 0; - let mut max: i64 = 0; - for value in data { - let t_value = *value; - if t_value > max { - max = t_value - }; - if t_value < min { - min = t_value - }; - } - - let recommended_bitdepth = find_bitdepth(max - min, min); - info!(" Recommended Bitdepth: {} ", recommended_bitdepth); - (recommended_bitdepth, min) -} - -fn find_bitdepth(max_int: i64, min_int: i64) -> i32 { - // Check where those ints fall into - let bitdepth = match max_int { - _ if max_int <= u8::MAX.into() => 8, - _ if max_int <= i16::MAX.into() => 16, - _ if max_int <= i32::MAX.into() => 32, - _ => 64, - }; - - let bitdepth_signed = match min_int { - _ if min_int == 0 => 8, - _ if min_int >= i16::MIN.into() => 16, - _ if min_int >= i32::MIN.into() => 32, - _ => 64, - }; - - get_max(bitdepth, bitdepth_signed) -} - -fn process_args(input_path: &str, arguments: &Args) { - if arguments.directory { - handle_directory(input_path, arguments); - } else { - process_file(input_path.into(), arguments, None); - } -} - -fn handle_directory(input_path: &str, arguments: &Args) { - let new_directory = format!("new_{}", input_path); - - if fs::create_dir_all(&new_directory).is_err() { - error!("Unable to create directory: {}", new_directory); - return; - } - - if let Ok(entries) = fs::read_dir(input_path) { - for entry_result in entries { - match entry_result { - Ok(entry) if entry.path().is_file() => { - process_file(entry.path(), arguments, Some(&new_directory)); - } - Err(e) => error!("Error reading directory entry: {}", e), - _ => {} - } - } - } else { - error!("Error reading directory: {}", input_path); - } -} -fn process_file(full_path: PathBuf, arguments: &Args, new_directory: Option<&str>) { - if let Some(filename) = full_path.file_name().and_then(|s| s.to_str()) { - let output_path = construct_output_path(filename, new_directory); - let mut file = match File::create(&output_path) { - Ok(file) => file, - Err(_) => { - error!("Unable to create file: {}", output_path); - return; - } - }; - - process_data_and_write_output(&full_path, &mut file, arguments); - } -} - -fn construct_output_path(filename: &str, new_directory: Option<&str>) -> String { - match new_directory { - Some(dir) => format!("{}/new_{}.txt", dir, filename), - None => format!("new_{}.txt", filename), - } -} - -fn process_data_and_write_output(full_path: &Path, file: &mut File, arguments: &Args) { - let full_path_str = full_path.to_str().unwrap_or(""); - debug!("File: {} ,", full_path_str); - let mut _bitdepth = 64; - let mut _dc_component: i64 = 0; - let mut _fractional = true; - let wav_data = read_metrics_from_wav(full_path_str); - if arguments.dump_raw { - writeln!(file, "{:?}", wav_data).expect("Unable to write to file"); - } - // Depending on Metric Tag, apply a transformation - let tag = tag_metric(full_path_str); - debug!("Tag: {:?}", tag); - let iwav_data = match tag { - MetricTag::Other => Vec::new(), - MetricTag::QuasiRandom => to_median_filter(&wav_data), - _ => wav_data.iter().map(|x| tag.from_float(*x)).collect(), - }; - // We split the code here - if !iwav_data.is_empty() { - _fractional = false; - if arguments.dump_optimized { - writeln!(file, "{:?}", iwav_data).expect("Unable to write to file"); - } - (_bitdepth, _dc_component) = analyze_int_data(&iwav_data); - } else { - (_bitdepth, _dc_component, _fractional) = analyze_data(&wav_data); - } - if _bitdepth == 64 || _fractional { - debug!("No optimization, exiting"); - std::process::exit(0); - } else if arguments.write { - debug!("Writing optimal file!"); - match iwav_data.len() { - 0 => write_optimal_wav(full_path_str, wav_data, _bitdepth, _dc_component, 1), - _ => write_optimal_int_wav(full_path_str, iwav_data, _bitdepth, _dc_component, 1), - } - } -} -#[derive(Parser, Default, Debug)] -#[command(author, version, about, long_about = None)] -struct Args { - /// input file - input: String, - - /// Write a new file with optimized settings, named filename_OPT.wav - #[arg(short)] - write: bool, - - #[arg(short, action)] - directory: bool, - - /// Samplerate to generate the optimized file - #[arg(short, long)] - samplerate: Option, - - /// Write raw (original) samples to a file, named as raw.out - #[arg(long, action)] - dump_raw: bool, - - /// Write optimized samples to a file, named as optimized.out - #[arg(long, action)] - dump_optimized: bool, -} - -fn main() { - // How to break the float part??? --> THERE ARE NO FLOATS! - // https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/deployment_guide/s2-proc-stat - env_logger::init(); - let arguments = Args::parse(); - debug!("{:?}", arguments); - process_args(&arguments.input, &arguments); -} diff --git a/prometheus-remote/Cargo.toml b/prometheus-remote/Cargo.toml deleted file mode 100644 index d2a13a8..0000000 --- a/prometheus-remote/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "prometheus-remote" -version = "0.1.0" -authors = ["Carlos Rolo "] -edition = "2021" -license = "Apache-2.0" -description = "Remote Read/Write server for prometheus" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[dependencies] -hound = "3.5" -chrono = "0.4.26" -claxon = "0.4.3" -warp = "0.3.5" -tokio = { version= "1", features = ["full"] } -symphonia = { version = "0.5.3", features = ["flac"] } -prom-remote-api = { version = "0.3.0", features = ["warp"] } -async-trait = "0.1.71" -env_logger = "0.11.0" -log = "0.4.0" -clap = {version = "4.3.14", features = ["derive"] } -regex = "1.9.1" -median = "0.3.2" -dtw_rs = "0.9.5" - diff --git a/prometheus-remote/src/flac_reader.rs b/prometheus-remote/src/flac_reader.rs deleted file mode 100644 index c8739fe..0000000 --- a/prometheus-remote/src/flac_reader.rs +++ /dev/null @@ -1,304 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::fs::File; - -use symphonia::core::audio::SampleBuffer; -use symphonia::core::codecs::{Decoder, DecoderOptions}; -use symphonia::core::errors::Error as SymphoniaError; -use symphonia::core::formats::{FormatOptions, FormatReader}; -use symphonia::core::io::MediaSourceStream; -use symphonia::core::meta::MetadataOptions; -use symphonia::core::probe::Hint; - -use chrono::{DateTime, Utc}; - -use crate::lib_vsri; - -// --- Flac Reader -// Remote Reader Spec: ? - -/* --- File Structure STRUCTURE -note: t=point in time, chan = channel, samples are the bytes for each channel. - in this example, each sample is made of 2 bytes (16bit) -+---------------------------+---------------------------+----- -| Frame 1 | Frame 2 | etc -+-------------+-------------+-------------+-------------+----- -| chan 1 @ t1 | chan 2 @ t1 | chan 1 @ t2 | chan 2 @ t2 | etc -+------+------+------+------+------+------+------+------+----- -| byte | byte | byte | byte | byte | byte | byte | byte | etc -+------+------+------+------+------+------+------+------+----- - */ -// TODO: Read from WAV file -// Flac metric is giving a ton of issues, trying to get something simpler -pub struct SimpleFlacReader { - file: File, // The File where the metric is -} - -impl SimpleFlacReader { - pub fn new(file: File, _start_ts: i64) -> Self { - SimpleFlacReader { file } - } - - pub fn get_samples( - &self, - start: Option, - end: Option, - ) -> std::result::Result, SymphoniaError> { - let mut sample_vec: Vec = Vec::new(); - let mut reader = claxon::FlacReader::new(&self.file).unwrap(); - let channels = reader.streaminfo().channels; - let mut sample_count = 0; - // TODO: Make this hold up to channel number - let mut sample_channel_data: [u16; 4] = [0, 0, 0, 0]; - let mut frame_reader = reader.blocks(); - let mut block = claxon::Block::empty(); - loop { - // Read a single frame. Recycle the buffer from the previous frame to - // avoid allocations as much as possible. - match frame_reader.read_next_or_eof(block.into_buffer()) { - Ok(Some(next_block)) => block = next_block, - Ok(None) => break, // EOF. - Err(error) => panic!("[DEBUG][READ][FLAC] {}", error), - } - debug!( - "[READ][SimpleFLaC] Processing block... Samples processed: {:?}", - sample_count - ); - if sample_count < start.unwrap_or(0) { - continue; - } - if sample_count > end.unwrap_or(lib_vsri::MAX_INDEX_SAMPLES) { - continue; - } - for sample in 0..block.duration() { - for channel in 0..channels { - sample_channel_data[channel as usize] = block.sample(channel, sample) as u16; - } - sample_vec.push(SimpleFlacReader::join_u16_into_f64(sample_channel_data)); - sample_count += 1; - } - } - debug!( - "[READ][SimpleFLaC] Returning samples for interval: {} {} Sample count: {:?}", - start.unwrap_or(0), - end.unwrap_or(0), - sample_count - ); - Ok(sample_vec) - } - - pub fn get_all_samples(&self) -> std::result::Result, SymphoniaError> { - self.get_samples(None, None) - } - - fn join_u16_into_f64(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) - } -} - -pub struct FlacMetric { - timeseries_data: Vec<(i64, f64)>, // Sample Data - file: File, // The File where the metric is - interval_start: i64, // The start interval in timestamp with miliseconds - decoder: Option>, // Flac decoder - format_reader: Option>, // Flac format reader -} - -impl FlacMetric { - pub fn new(file: File, start_ts: i64) -> Self { - FlacMetric { - timeseries_data: Vec::new(), - file, - interval_start: start_ts, - decoder: None, - format_reader: None, - } - } - - fn datetime_from_ms(real_time: i64) -> String { - // Time is in ms, convert it to seconds - let datetime = DateTime::::from_timestamp(real_time / 1000, 0).unwrap(); - // Transform datetime to string with the format YYYY-MM-DD - let datetime_str = datetime.format("%Y-%m-%d").to_string(); - datetime_str - } - - /// Load sample data into the Flac Object - fn load_samples(self) -> Vec<(i64, f64)> { - Vec::new() - } - - fn get_format_reader(&self) -> Box { - // TODO: One more unwrap to deal with - let owned_file = self.file.try_clone().unwrap(); - debug!("[READ][FLAC] Probing file: {:?}", owned_file); - let file = Box::new(owned_file); - // Create the media source stream using the boxed media source from above. - let mss = MediaSourceStream::new(file, Default::default()); - // Use the default options when reading and decoding. - let format_opts: FormatOptions = Default::default(); - let metadata_opts: MetadataOptions = Default::default(); - // Probe the media source stream for a format. - let probed = symphonia::default::get_probe() - .format( - Hint::new().mime_type("FLaC"), - mss, - &format_opts, - &metadata_opts, - ) - .unwrap(); - // Get the format reader yielded by the probe operation. - probed.format - } - - fn get_decoder(&self) -> Box { - let decoder_opts: DecoderOptions = Default::default(); - let format = self.get_format_reader(); - // Get the default track. - let track = format.default_track().unwrap(); - // Create a decoder for the track. - symphonia::default::get_codecs() - .make(&track.codec_params, &decoder_opts) - .unwrap() - } - - /// Read samples from a file with an optional start and end point. - pub fn get_samples( - &self, - start: Option, - end: Option, - ) -> std::result::Result, SymphoniaError> { - let mut sample_vec: Vec = Vec::new(); - let mut format_reader = self.get_format_reader(); - let mut decoder = self.get_decoder(); - let channels = decoder.codec_params().channels.unwrap().count(); - let mut sample_buf = None; - let mut frame_counter: i32 = 0; - let start_frame = start.unwrap_or(0); - let end_frame = end.unwrap_or(lib_vsri::MAX_INDEX_SAMPLES); - // Loop over all the packets, get all the samples and return them - loop { - let packet = match format_reader.next_packet() { - Ok(packet) => packet, - Err(err) => break error!("[READ]Reader error: {}", err), - }; - // How many frames inside the packet - let dur = packet.dur() as i32; - // Check if we need to decode this packet or not - if !(start_frame < frame_counter + dur && end_frame > frame_counter + dur) { - continue; - } - // Decode the packet into samples. - // TODO: This is overly complex, split into its own code - match decoder.decode(&packet) { - Ok(decoded) => { - // Consume the decoded samples (see below). - if sample_buf.is_none() { - // Get the audio buffer specification. - let spec = *decoded.spec(); - // Get the capacity of the decoded buffer. Note: This is capacity, not length! - let duration = decoded.capacity() as u64; - // Create the sample buffer. - sample_buf = Some(SampleBuffer::::new(duration, spec)); - } - // Each frame contains several samples, we need to get the frame not the sample. Since samples = frames * channels - if let Some(buf) = &mut sample_buf { - buf.copy_interleaved_ref(decoded); - let mut i16_samples: [u16; 4] = [0, 0, 0, 0]; - let mut i = 1; // Starting at 1, channel number is not 0 indexed... - for sample in buf.samples() { - if i >= channels { - frame_counter += 1; - if frame_counter >= start_frame && frame_counter <= end_frame { - sample_vec.push(FlacMetric::join_u16_into_f64(i16_samples)); - } - i = 1; - } - i16_samples[i - 1] = *sample as u16; - i += 1; - } - } - } - Err(SymphoniaError::DecodeError(err)) => error!("[READ]Decode error: {}", err), - Err(err) => break error!("[READ]Unexpeted Decode error: {}", err), - } - } - Ok(sample_vec) - } - - /// Read all samples from a file - pub fn get_all_samples(&self) -> std::result::Result, SymphoniaError> { - let mut sample_vec: Vec = Vec::new(); - let mut format_reader = self.get_format_reader(); - let mut decoder = self.get_decoder(); - let channels = decoder.codec_params().channels.unwrap().count(); - let mut sample_buf = None; - // Loop over all the packets, get all the samples and return them - loop { - let packet = match format_reader.next_packet() { - Ok(packet) => packet, - Err(err) => break debug!("[READ]Reader error: {}", err), - }; - // Decode the packet into audio samples. - match decoder.decode(&packet) { - Ok(decoded) => { - // Consume the decoded audio samples (see below). - if sample_buf.is_none() { - // Get the audio buffer specification. - let spec = *decoded.spec(); - // Get the capacity of the decoded buffer. Note: This is capacity, not length! - let duration = decoded.capacity() as u64; - // Create the sample buffer. - sample_buf = Some(SampleBuffer::::new(duration, spec)); - } - if let Some(buf) = &mut sample_buf { - buf.copy_interleaved_ref(decoded); - let mut i16_samples: [u16; 4] = [0, 0, 0, 0]; - let mut i = 1; // Starting at 1, channel number is not 0 indexed... - for sample in buf.samples() { - if i >= channels { - sample_vec.push(FlacMetric::join_u16_into_f64(i16_samples)); - i = 1; - } - i16_samples[i - 1] = *sample as u16; - i += 1; - } - } - } - Err(SymphoniaError::DecodeError(err)) => error!("[READ]Decode error: {}", err), - Err(err) => break error!("[READ]Unexpeted Decode error: {}", err), - } - } - // Just to make it compile - Ok(sample_vec) - } - - /// Recreate a f64 - fn join_u16_into_f64(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) - } -} diff --git a/prometheus-remote/src/fs_utils.rs b/prometheus-remote/src/fs_utils.rs deleted file mode 100644 index f4aa629..0000000 --- a/prometheus-remote/src/fs_utils.rs +++ /dev/null @@ -1,430 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use chrono::{DateTime, Duration, Utc}; -/// All the utils/code related the to file management -/// -/// ASSUMPTION: EACH DAY HAS 1 FILE!!! If this assumption change, change this file! -/// TODO: (BIG ONE!) Make this time period agnostic (so it would work with days, weeks, etc) -/// For a READ request that needs data for MetricX from Ta to Tb this would do the following: -/// 1. Do we have metricX? -> No, stop. -/// 2. Which file has Ta, and which has Tb? -/// 2.1 Select them to read -/// 3. Read the indexes, and retrieve the available samples -/// -/// Suggested internal Data Structure of the WAV file -/// -/// +--------------------------------------------------------------------------------------------------------+ -/// | HEADER | i16 @Chan1 | i16 @Chan2 | i16 @Chan3 | i16 @Chan4 | tick @Chan5 | i16 @Chan1 | i16 @Chan2 |...| -/// +--------------------------------------------------------------------------------------------------------+ -/// -/// Prometheus Point: f64 split into 4x i16 (channel 1 to 4) Timestamp: Tick into Channel 5 -/// -use std::fs::{self, File}; -use std::mem; - -use crate::flac_reader::SimpleFlacReader; -use crate::lib_vsri::{day_elapsed_seconds, start_day_ts, Vsri, MAX_INDEX_SAMPLES}; - -struct DateRange(DateTime, DateTime); - -// Iterator for Day to Day -// TODO: move this to several impl? So we can return iterators over several time periods? -impl Iterator for DateRange { - type Item = DateTime; - fn next(&mut self) -> Option { - if self.0 <= self.1 { - let next = self.0 + Duration::days(1); - Some(mem::replace(&mut self.0, next)) - } else { - None - } - } -} - -#[derive(Debug, Clone, Copy)] -pub struct PromDataPoint { - pub point: f64, - pub time: i64, -} - -impl PromDataPoint { - /// Creates a new Prometheus Data Point. It assumes a timestamp with seconds since EPOCH, and converts internally to - /// miliseconds since EPOCH. - pub fn new(data: f64, timestamp: i64) -> Self { - PromDataPoint { - point: data, - time: timestamp * 1000, - } - } -} -/// Holds a time range for the file and index -#[derive(Debug, Clone, Copy)] -struct FileTimeRange { - start: i32, - end: i32, -} - -impl FileTimeRange { - fn new(start: i32, end: i32) -> Self { - FileTimeRange { start, end } - } -} - -/// A struct that allows the precise location of data inside the file is in it -/// TODO: Make FILE a FlacReader -#[derive(Debug)] -pub struct DataLocator { - file: File, - index: Vsri, - time_range: FileTimeRange, - date: DateTime, -} - -impl DataLocator { - /// Creates a new DataLocator, includes the File, Index and the Time Range for the data it is expected to return. - /// This is a lazy, doesn't check for the intersection between the time range and data owned until the data is - /// requested. - fn new(file: File, index: Vsri, time_range: FileTimeRange, date: DateTime) -> Self { - DataLocator { - file, - index, - time_range, - date, - } - } - - /// Checks if the Locator time_range intersects with the Index data - fn do_intersect(&self) -> bool { - // If the data start after the end of the range or the data ends before the beggining of the range - if self.index.min() > self.time_range.end || self.index.max() < self.time_range.start { - return false; - } - // index function checks for no ownership, this function checks for ownership, invert the result - !self - .index - .is_empty([self.time_range.start, self.time_range.end]) - } - - fn get_samples_from_range(&self) -> Option<[i32; 2]> { - // By default, get all the samples - let mut sample_range: [i32; 2] = [0, MAX_INDEX_SAMPLES]; - if !self.do_intersect() { - return None; - } - match self.time_range.start { - 0 => { - sample_range[0] = 0; - } - _ => { - // There is intersection, it can unwrap safely - sample_range[0] = self.index.get_this_or_next(self.time_range.start).unwrap(); - } - } - match self.time_range.end { - // Match cannot shadow statics and whatever - _ if self.time_range.end == MAX_INDEX_SAMPLES => { - sample_range[1] = self.index.get_sample_count(); - } - _ => { - // There is intersection, it can unwrap safely - sample_range[1] = self - .index - .get_this_or_previous(self.time_range.start) - .unwrap(); - } - } - Some(sample_range) - } - - /// Consumes the DataLocator to return a Vec of PromDataPoints - pub fn into_prom_data_point(self) -> Vec { - let mut prom_data = Vec::new(); - let samples_locations = self.get_samples_from_range(); - let flac_metric = SimpleFlacReader::new(self.file, self.time_range.start as i64); - let tmp_vec = self.index.get_all_timestamps(); - // There goes an empty arry - if samples_locations.is_none() { - return prom_data; - } - let start = samples_locations.unwrap()[0]; - let end = samples_locations.unwrap()[1] - 1; - debug!( - "[READ] Samples located! From {} to {}. TS available: {}", - start, - end, - tmp_vec.len() - ); - let time_for_samples = &tmp_vec[start as usize..=end as usize]; - // The time I learned if..else is an expression! - let temp_result = if start == 0 && end == self.index.get_sample_count() { - flac_metric.get_all_samples() - } else { - flac_metric.get_samples(Some(start), Some(end)) - }; - match temp_result { - // Pack this into DataPoints - Ok(samples) => { - for (v, t) in samples.into_iter().zip(time_for_samples.iter()) { - let ts = *t as i64 + start_day_ts(self.date); - prom_data.push(PromDataPoint::new(v, ts * 1000)); - } - } - Err(err) => { - error!("[READ] Error processing FLaC file {:?}", err); - return prom_data; - } - } - prom_data - } - - /// Given a metric name and a time interval, returns all the files handles for the files that *might* contain that data (No data range intersection is done here) - pub fn get_locators_for_range( - metric_name: &str, - start_time: i64, - end_time: i64, - ) -> Option> { - let mut file_index_vec = Vec::new(); - let data_locator_vec: Vec; - let start_date = DateTime::::from_timestamp(start_time / 1000, 0).unwrap(); - let end_date = DateTime::::from_timestamp(end_time / 1000, 0).unwrap(); - let file_time_intervals = time_intervals(start_time, end_time); - debug!( - "[READ] Time intervals for the range {:?} ", - file_time_intervals - ); - let mut range_count = 0; - for date in DateRange(start_date, end_date).enumerate() { - let data_file_name = format!("{}_{}", metric_name, date.1.format("%Y-%m-%d")); - debug!( - "[READ] Time intervals for file {}: {:?} ", - data_file_name, file_time_intervals[range_count] - ); - let vsri = Vsri::load(&data_file_name); - range_count += 1; - let file = match fs::File::open(format!("{}.flac", data_file_name.clone())) { - Ok(file) => file, - Err(err) => { - warn!( - "[READ] Error processing {}.flac. Error: {}. Skipping file.", - data_file_name, err - ); - continue; - } - }; - // If I got here, I should be able to unwrap Vsri safely. - file_index_vec.push((file, vsri.unwrap(), date)); - } - // Creating the Time Range array - let start_ts_i32 = day_elapsed_seconds(start_time); - let end_ts_i32 = day_elapsed_seconds(end_time); - let mut time_intervals = Vec::new(); - match range_count { - 1 => { - time_intervals.push(FileTimeRange::new(start_ts_i32, end_ts_i32)); - } - 2 => { - time_intervals.push(FileTimeRange::new(start_ts_i32, MAX_INDEX_SAMPLES)); - time_intervals.push(FileTimeRange::new(0, end_ts_i32)); - } - _ => { - time_intervals.push(FileTimeRange::new(start_ts_i32, MAX_INDEX_SAMPLES)); - for _i in 2..range_count { - time_intervals.push(FileTimeRange::new(0, MAX_INDEX_SAMPLES)); - } - time_intervals.push(FileTimeRange::new(0, end_ts_i32)); - } - } - - // We have at least one file create the Object - if !file_index_vec.is_empty() { - data_locator_vec = file_index_vec - .into_iter() - .map(|item| DataLocator::new(item.0, item.1, time_intervals[item.2 .0], item.2 .1)) - .collect(); - debug!("[READ] Returning Object {:?} ", data_locator_vec); - return Some(data_locator_vec); - } - None - } -} - -/// Returns a Vector of array of time intervals (in seconds) for the interval of time -fn time_intervals(start_time: i64, end_time: i64) -> Vec<[i32; 2]> { - let mut time_intervals = Vec::new(); - let start_date = DateTime::::from_timestamp(start_time / 1000, 0).unwrap(); - let end_date = DateTime::::from_timestamp(end_time / 1000, 0).unwrap(); - let start_ts_i32 = day_elapsed_seconds(start_time); - let end_ts_i32 = day_elapsed_seconds(end_time); - let date_spread_size = DateRange(start_date, end_date).count(); - match date_spread_size { - 1 => { - time_intervals.push([start_ts_i32, end_ts_i32]); - } - 2 => { - time_intervals.push([start_ts_i32, MAX_INDEX_SAMPLES]); - time_intervals.push([0, end_ts_i32]); - } - _ => { - time_intervals.push([start_ts_i32, MAX_INDEX_SAMPLES]); - for _i in 2..date_spread_size { - time_intervals.push([0, MAX_INDEX_SAMPLES]); - } - time_intervals.push([0, end_ts_i32]); - } - } - time_intervals -} - -/// Given a metric name and a time interval, returns all the files handles for the files that contain that data -pub fn get_file_index_time( - metric_name: &str, - start_time: i64, - end_time: i64, -) -> Option> { - DataLocator::get_locators_for_range(metric_name, start_time, end_time) -} - -pub fn data_locator_into_prom_data_point(data: Vec) -> Vec { - debug!("[READ] Locators: {:?}", data); - let mut data_points = Vec::new(); - for dl in data { - let mut proms = dl.into_prom_data_point(); - if !proms.is_empty() { - data_points.append(&mut proms); - } - } - data_points -} - -/// Retrieves all the available data points in a timerange in the provided Vector of files and indexes -pub fn get_data_between_timestamps( - start_time: i64, - end_time: i64, - file_vec: Vec<(File, Vsri)>, -) -> Vec { - let mut data_points = Vec::new(); - /* Processing logic: - Case 1 (2+ files): - The first file, the period if from `start_time` to end of the file (use index), - The second until the last file (exclusive), we need all the data points we can get (read full file). - The last file we need from start until the `end_time` (use index). - Case 2 (Single file): - Read the index to locate the start sample and the end sample. - Read the file and obtain said samples. - */ - // How many files to process - let file_count = file_vec.len(); - // Get the baseline timestamps to add to the index timestamps - let start_date = DateTime::::from_timestamp(start_time / 1000, 0).unwrap(); - let end_date = DateTime::::from_timestamp(end_time / 1000, 0).unwrap(); - let ts_bases: Vec = DateRange(start_date, end_date).map(start_day_ts).collect(); - let start_ts_i32 = day_elapsed_seconds(start_time); - let end_ts_i32 = day_elapsed_seconds(end_time); - // Files might not match the intervals of time, a time array of time intervals need to be done. - - // Where the samples land in the indexes - let mut samples_locations: [i32; 2]; - for pack in file_vec.into_iter().enumerate() { - let iter_index = pack.0; - let file = pack.1 .0; - let vsri = pack.1 .1; - debug!( - "[READ] Locating samples. VSRI {:?} TS: {} - {}", - vsri, start_ts_i32, end_ts_i32 - ); - // Check if the timestamps intercept the index space - if file_count == 1 { - debug!("[READ] Processing single file..."); - // Case 2 - // get_sample can return None - if vsri.min() > end_ts_i32 || vsri.max() < start_ts_i32 { - debug!("[READ] No intersection. Returning."); - return data_points; - } - let start_sample = vsri.get_this_or_next(start_ts_i32); - if start_sample.is_none() { - // No sample in the file fits the current requested interval - debug!("[READ] No intersection (Part2). Returning."); - return data_points; - } - // If I can start reading the file, I can get at least one sample, so it is safe to unwrap. - let end_sample = vsri.get_this_or_previous(end_ts_i32).unwrap(); - samples_locations = [start_sample.unwrap(), end_sample]; - } else { - // Case 1 - debug!("[READ] Processing multiple files..."); - match pack.0 { - // First file - 0 => { - let start_sample = vsri.get_this_or_next(start_ts_i32); - if start_sample.is_none() { - continue; - } - samples_locations = [start_sample.unwrap(), vsri.get_sample_count()]; - } - // Last file - _ if iter_index == file_count - 1 => { - let end_sample = vsri.get_this_or_previous(end_ts_i32); - if end_sample.is_none() { - continue; - } - samples_locations = [0, end_sample.unwrap()]; - } - // Other files - _ => { - // Collect the full file - samples_locations = [0, vsri.get_sample_count()]; - } - } - } - // Collect the data points - let flac_metric = SimpleFlacReader::new(file, start_time); - let tmp_vec = vsri.get_all_timestamps(); - let start = samples_locations[0]; - let end = samples_locations[1] - 1; - debug!( - "[READ] Samples located! From {} to {}. TS available: {}", - start, - end, - tmp_vec.len() - ); - // !@)(#*&!@)# usize and ints... - let time_for_samples = &tmp_vec[start as usize..=end as usize]; - // The time I learned if..else is an expression! - let temp_result = if start == 0 && end == vsri.get_sample_count() { - flac_metric.get_all_samples() - } else { - flac_metric.get_samples(Some(start), Some(end)) - }; - - match temp_result { - // Pack this into DataPoints - Ok(samples) => { - for (v, t) in samples.into_iter().zip(time_for_samples.iter()) { - let ts = *t as i64 + ts_bases[iter_index]; - data_points.push(PromDataPoint::new(v, ts)); - } - } - Err(err) => { - error!("[READ] Error processing FLaC file {:?}", err); - continue; - } - } - } - debug!("[READ] Returning datapoints: {:?}", data_points); - data_points -} diff --git a/prometheus-remote/src/lib_vsri.rs b/prometheus-remote/src/lib_vsri.rs deleted file mode 100644 index 7f146b2..0000000 --- a/prometheus-remote/src/lib_vsri.rs +++ /dev/null @@ -1,486 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use chrono::{DateTime, Timelike, Utc}; -/// Very Small Rolo Index -/// This is an index made for detection of gaps in continuous data with the same sampling rate. -/// Each continuous segment of data will be mapped to a line using the formula y = mx + B plus -/// the number of points in the data series. -/// m - Sampling rate -/// b - Series initial point in time in [x,y] -/// x - sample # in the data file, this is ALWAYS sequential. There are no holes in samples -/// y - time -/// -/// This way, discovering the segment number is solving the above equation for X if the -/// time provided is bigger than the initial point. -/// -/// best case for sample retrieval O(1) -/// worst case O(N) (N is the number of segments) -/// Space usage: 5Bytes for 64k samples. -/// Or: 30Bytes for 2^32 Samples -/// -/// Example of content of an index -/// 55745 -/// 59435 -/// 15,0,55745,166 -/// 15,166,58505,63 -use std::fs::File; -use std::io::{BufRead, BufReader, BufWriter, Write}; - -// TODO: This should be configurable. Indexes are build for 1 day worth of samples, at 1 sample per second -pub static MAX_INDEX_SAMPLES: i32 = 86400; - -// Helper functions, this should be moved somewhere -/// Returns the number of seconds elapsed for the day provided in the `timestamp_sec` -pub fn day_elapsed_seconds(timestamp_sec: i64) -> i32 { - let datetime = DateTime::::from_timestamp(timestamp_sec, 0).unwrap(); - // Extract the time components (hour, minute, and second) from the DateTime - let hour = datetime.time().hour(); - let minute = datetime.time().minute(); - let second = datetime.time().second(); - // Calculate the total seconds since the start of the day - (hour * 3600 + minute * 60 + second) as i32 -} - -/// Returns the timestamp for the beginning of the day given a DateTime object. -pub fn start_day_ts(dt: DateTime) -> i64 { - let hour = dt.time().hour(); - let minute = dt.time().minute(); - let second = dt.time().second(); - dt.timestamp() - (hour * 3600 + minute * 60 + second) as i64 -} - -/// In this implementation we are writing sample by sample to the WAV file, so -/// we can't do a proper segment calculation. So there will a special first segment -/// that will hold the first point so we can calculate the segments from there. -/// -/// # Examples -/// Creating a new index, metric is of expected time 0, but for sure location of X is 0 -/// ```no_run -/// let vsri = Vsri::new("metric_name", 0, 0); -/// vsri.flush(); -/// ``` -/// Updating an index, adding point at time 5sec -/// ```no_run -/// let vsri = Vsri::load("metric_name").unwrap().update_for_point(5); -/// vsri.flush(); -/// ``` -/// Fetch a sample location from the index given a timestamp -/// ```no_run -/// let vsri = Vsri::load("metric_name").unwrap(); -/// vsri.get_sample_location("metric_name", 5); -/// ``` - -/// Index Structure -/// index_name: Name of the index file we are indexing -/// min_ts: the minimum TS available in this file -/// max_ts: the highest TS available in this file -/// vsri_segments: Description of each segment -/// [sample_rate (m), initial_point(x,y), # of samples(length)] -/// Each segments describes a line with the form of mX + B that has a lenght -/// of # of samples. -#[derive(Debug)] -pub struct Vsri { - index_file: String, - min_ts: i32, - max_ts: i32, - // TODO: ENUM here to make it simpler to understand what each point in the array means - vsri_segments: Vec<[i32; 4]>, // [Sample Rate (m), X0, Y0, # of Samples] -} - -impl Vsri { - /// Creates the index, it doesn't create the file in the disk - /// flush needs to be called for that - pub fn new(filename: &str) -> Self { - debug!("[INDEX] Creating new index!"); - let segments: Vec<[i32; 4]> = Vec::new(); - Vsri { - index_file: filename.to_string(), - min_ts: 0, - max_ts: 0, - vsri_segments: segments, - } - } - - /// Given a filename and a time location, returns the sample location in the - /// data file. Or None in case it doesn't exist. - pub fn get_sample_location(filename: String, y: i32) -> Option { - let vsri = match Vsri::load(&filename) { - Ok(vsri) => vsri, - Err(_err) => return None, - }; - if vsri.min() <= y && y <= vsri.max() { - return vsri.get_sample(y); - } - None - } - - /// Get the sample for this timestamp or the next one - pub fn get_this_or_next(&self, y: i32) -> Option { - let r = self.get_sample(y).or(self.get_next_sample(y)); - debug!("[INDEX] This or next location {:?} for TS {}", r, y); - r - } - - /// Get the sample for this timestamp or the previous one - pub fn get_this_or_previous(&self, y: i32) -> Option { - let r = self.get_sample(y).or(self.get_previous_sample(y)); - debug!("[INDEX] This or previous location {:?} for TS {}", r, y); - r - } - - /// Returns the next sample for the provided timestamp. - /// This might be useful to find the next segment timestamp if the timestamp - /// is in between segments. It will return None in case the timestamp is over - /// the maximum timestamp of the index. - pub fn get_next_sample(&self, y: i32) -> Option { - if y < self.min() { - return Some(0); - } else if y >= self.max() { - return None; - } - // It wasn't smaller, so let's see if we have a sample that matches - for segment in self.vsri_segments.clone().into_iter().rev() { - let first_sample = segment[1]; - let y0 = segment[2]; - if y <= y0 { - return Some(first_sample); - } - } - None - } - - /// Returns the previous sample for the provided timestamp. - /// This might be useful to find the previous segment timestamp if the timestamp - /// is in between segments. It will return None in case the timestamp is bellow - /// the minimum timestamp of the index. - pub fn get_previous_sample(&self, y: i32) -> Option { - if y < self.min() { - return None; - } else if y >= self.max() { - // Return the last segment, # of samples. That is the total # of samples in a file - return Some(self.get_sample_count()); - } - // Cycle through the segments - for segment in &self.vsri_segments { - let first_sample = segment[1]; - let y0 = segment[2]; - if y < y0 { - // Return the last sample of the previous segment - return Some(first_sample - 1); - } - } - None - } - - /// Checks if the time segment provided falls in an empty space (Between 2 segments) - /// This is useful to check intersections. If this function returns false the provided - /// time segment does overlap with the existing time segments in the file - pub fn is_empty(&self, time_segment: [i32; 2]) -> bool { - // I could simple try to get 2 samples and if one of the returns, it is not empty - // but I would walk segments twice instead of once - match &self.vsri_segments.len() { - 1 => { - // It starts or ends inside the segment (might be a single sample) - if (time_segment[0] >= self.min() && time_segment[0] <= self.max()) - || (time_segment[1] <= self.max() && time_segment[1] >= self.min()) - { - return false; - } - // Or it contains the whole segment - if time_segment[0] < self.min() && time_segment[1] > self.max() { - return false; - } - } - _ => { - // More than 1 segment - let mut previous_seg_end: i32 = 0; - for (segment_count, segment) in self.vsri_segments.iter().enumerate() { - let sample_rate = segment[0]; - let y0 = segment[2]; - let num_samples = segment[3]; - let segment_end_y = y0 + (sample_rate * (num_samples - 1)); - // If we are in the 2+ segment, lets test if the time falls in the middle - if segment_count >= 1 - && (time_segment[0] > previous_seg_end && time_segment[1] < y0) - { - return true; - } - // Could this be simplified with Karnaugh map? I'll dig my books later - // It starts or ends inside the segment - if (time_segment[0] >= y0 && time_segment[0] < segment_end_y) - || (time_segment[1] < segment_end_y && time_segment[1] >= y0) - { - return false; - } - // Or it contains the whole segment - if time_segment[0] < y0 && time_segment[1] > segment_end_y { - return false; - } - // At this point, time segments doesn't touch this segment. - previous_seg_end = segment_end_y; - } - } - } - // Didn't find any intersection, or left in the middle, it is empty - true - } - - /// Update the index for the provided point - /// y - time in seconds - pub fn update_for_point(&mut self, y: i32) -> Result<(), ()> { - // Y needs to be bigger that the current max_ts, otherwise we are appending a point in the past - // TODO: #11 Quantiles sends several metrics for the same time, how to handle it? - if y < self.max_ts { - // Is this always a period (day) change? Assuming so - warn!( - "[INDEX] Trying to index a point in the past: {}, provided point: {}", - self.max_ts, y - ); - return Err(()); - } - self.max_ts = y; - let segment_count = self.vsri_segments.len(); - // Empty segments, create a new one, this is also a new index, update the timestamps - if segment_count == 0 { - self.min_ts = y; - self.vsri_segments.push(self.create_fake_segment(y)); - return Ok(()); - } - if self.is_fake_segment() { - // In the presence of a fake segment (where m is 0), and a new point, we are now - // in a situation we can calculate a decent segment - self.vsri_segments[segment_count - 1] = self.generate_segment(y); - } else { - // Check ownership by the current segment - if self.fits_segment(y) { - // It fits, increase the sample count and it's done - debug!("[INDEX] Same segment, updating. TS: {}", y); - self.vsri_segments[segment_count - 1][3] += 1; - return Ok(()); - } - // If it doesn't fit, create a new fake segment - self.vsri_segments.push(self.create_fake_segment(y)); - } - Ok(()) - } - - /// Minimum time stamp - pub fn min(&self) -> i32 { - self.min_ts - } - - /// Maximum time stamp - pub fn max(&self) -> i32 { - self.max_ts - } - - fn calculate_b(&self, segment: &[i32; 4]) -> i32 { - // b = y - mx - - segment[2] - segment[0] * segment[1] - } - - /// Returns the most recent (the last) calculated segment - fn current_segment(&self) -> [i32; 4] { - match self.vsri_segments.len() { - 0 => [0, 0, 0, 0], - _ => self.vsri_segments[self.vsri_segments.len() - 1], - } - } - - /// Get the sample location for a given point in time, or None if there is no sample for that specific TS - pub fn get_sample(&self, y: i32) -> Option { - for segment in &self.vsri_segments { - let sample_rate = segment[0]; - let y0 = segment[2]; - let num_samples = segment[3]; - - let segment_end_y = y0 + (sample_rate * (num_samples - 1)); - - if y >= y0 && y <= segment_end_y { - // x = (y - b)/ m - // TODO: This can return floats! - let x_value = (y - self.calculate_b(segment)) / sample_rate; - return Some(x_value); - } - } - None // No matching segment found for the given Y value - } - - /// For a given sample position, return the timestamp associated - pub fn get_time(&self, x: i32) -> Option { - match x { - 0 => Some(self.min()), - _ if x > self.get_sample_count() => None, - _ if x == self.get_sample_count() => Some(self.max()), - // it is somewhere in the middle - _ => { - // Find the segment where X fits - for segment in &self.vsri_segments { - if x >= segment[1] && x < (segment[1] + segment[3]) { - // Belongs here! Return Segment TS + the TS interval * x - let y = segment[2] + segment[0] * x; - return Some(y); - } - continue; - } - None - } - } - } - - /// Returns a vector will all the timestamps covered by this index - pub fn get_all_timestamps(&self) -> Vec { - let mut time_vec = Vec::new(); - for segment in &self.vsri_segments { - let samples = segment[3]; // Range is EXCLUSIVE above - let time_step = segment[0]; - let initial_ts = segment[2]; - let time_iter = (0..samples).map(|f| (f * time_step) + initial_ts); - time_vec.extend(time_iter); - } - time_vec - } - - pub fn get_sample_count(&self) -> i32 { - let last_segment = self.current_segment(); - last_segment[3] + last_segment[1] - } - - /// Generates a segment from a point. It uses information stored in the segment - /// to regenerate the same segment with the new point information. - fn generate_segment(&self, y: i32) -> [i32; 4] { - // Retrieve the last segment - let last_segment = self.current_segment(); - // double check for correctness - if last_segment[0] != 0 { - return last_segment; - } - // Calculate the new segment - // m = (y1-y0)/(x1-x0) -> (x1-x0) = 1 => m = y1-y0 (X is a sequence) - let m = y - last_segment[2]; - // We got m, the initial points are the same, and now we have 2 samples - [m, last_segment[1], last_segment[2], 2] - } - - fn update_segment_samples(mut self) { - let segment_count = self.vsri_segments.len(); - self.vsri_segments[segment_count - 1][3] += 1; - } - - /// Generate a fake segment, this can't be used for ownership testing - /// x is the previous segment sample number - /// We only have the first y0 point, nothing else - fn create_fake_segment(&self, y: i32) -> [i32; 4] { - debug!("[INDEX] New segment, creating for point: {}", y); - let segment = self.current_segment(); - // First point of the new segment: Prior starting point + Number of samples - let x = segment[1] + segment[3]; - [0, x, y, 1] - } - - /// Checks if the most recent segment is a fake segment - fn is_fake_segment(&self) -> bool { - let last_segment = self.current_segment(); - last_segment[0] == 0 - } - - /// Returns true if a point fits the last segment of the index - fn fits_segment(&self, y: i32) -> bool { - let last_segment = self.current_segment(); - let b = self.calculate_b(&last_segment); - // What we have to check, is with the given y, calculate x. - // Then check if x fits the interval for the current line - // and it has to be the next one in the line - // x = (y - b)/ m - // TODO: Can return float, watch out - let x_value = (y - b) / last_segment[0]; - debug!( - "[INDEX] Fit Calculation (Segment {:?}). b: {}, x: {}, calculated x: {}", - last_segment, - b, - (last_segment[3] + last_segment[1]), - x_value - ); - x_value == last_segment[3] + last_segment[1] - } - - /// Writes the index to the disk - /// File format - /// line | content - /// 1 | minimum timestamp on this file. eg: 10 - /// 2 | maximum timestamp on this file. eg: 34510 - /// 3..N | Segments. 4 fields separated by commas. ex: 0,1,2,3 - pub fn flush(&self) -> Result<(), std::io::Error> { - let file = File::create(format!("{}.vsri", &self.index_file))?; - let mut writer = BufWriter::new(file); - - // Write index_file, min_ts, max_ts on the first three lines - writeln!(writer, "{}", self.min_ts)?; - writeln!(writer, "{}", self.max_ts)?; - - // Write each vsri_segment on a separate line - for segment in &self.vsri_segments { - writeln!( - writer, - "{},{},{},{}", - segment[0], segment[1], segment[2], segment[3] - )?; - } - - writer.flush()?; - Ok(()) - } - - /// Reads an index file and loads the content into the structure - /// TODO: Add error control (Unwrap hell) - pub fn load(filename: &String) -> Result { - debug!("[INDEX] Load existing index"); - let file = File::open(format!("{}.vsri", &filename))?; - let reader = BufReader::new(file); - let mut min_ts = 0; - let mut max_ts = 0; - let mut segments: Vec<[i32; 4]> = Vec::new(); - let mut i = 1; // Line 1,2 are not segments. - for line in reader.lines() { - let line = line?; - match i { - 1 => { - min_ts = line.trim().parse::().unwrap(); - } - 2 => { - max_ts = line.trim().parse::().unwrap(); - } - _ => { - let values = line - .split(',') - .map(|value| value.trim().parse::()) - .collect::, _>>() - .unwrap(); - segments.push([values[0], values[1], values[2], values[3]]); - } - } - i += 1; - } - Ok(Vsri { - index_file: filename.to_string(), - min_ts, - max_ts, - vsri_segments: segments, - }) - } -} diff --git a/prometheus-remote/src/main.rs b/prometheus-remote/src/main.rs deleted file mode 100644 index 94d6006..0000000 --- a/prometheus-remote/src/main.rs +++ /dev/null @@ -1,263 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Lucas - Once the project is far enough along I strongly reccomend reenabling dead code checks -#![allow(dead_code)] - -mod flac_reader; -mod fs_utils; -mod lib_vsri; -mod wav_writer; -use fs_utils::data_locator_into_prom_data_point; -use wav_writer::WavMetric; - -use async_trait::async_trait; -use std::{convert::Infallible, sync::Arc}; - -use prom_remote_api::{ - types::{ - Error, Label, MetricMetadata, Query, QueryResult, RemoteStorage, Result, Sample, - TimeSeries, WriteRequest, - }, - web, -}; -use warp::Filter; - -use log::{debug, error, info, warn}; - -#[macro_use] -extern crate log; - -use crate::fs_utils::get_file_index_time; - -// Data sampling frequency. How many seconds between each sample. -static VERSION: &str = "0.1.1"; - -fn get_flac_samples_to_prom( - metric: &str, - source: &str, - _job: &str, - start_ms: i64, - end_ms: i64, - step_ms: i64, -) -> Vec { - // TODO: #6 Count the number of samples for the given metric! -> Can be done with the Index alone \m/ \m/ - // TODO: #1 Do not ignore Job! - // TODO: #2 Do not ignore Step! - // Just for today, Step in the files is always 15sec, 15000 ms. - let sample_step = (step_ms / 15000) as usize; - if step_ms == 0 { - return vec![Sample { - value: 1.0, - timestamp: start_ms, - }]; - } - // Build the metric name - let metric_name = format!("{}_{}", metric, source); - let files_to_parse = get_file_index_time(&metric_name, start_ms, end_ms); - if files_to_parse.is_none() { - error!("No data found!"); - return vec![Sample { - value: 1.0, - timestamp: start_ms, - }]; - } - //let prom_vec = get_data_between_timestamps(start_ms, end_ms, files_to_parse.unwrap()); - let prom_vec = data_locator_into_prom_data_point(files_to_parse.unwrap()); - let prom_len = prom_vec.len(); - //debug!("[MAIN] Prom data points: {:?}", prom_vec); - debug!( - "[MAIN] Returned samples: {:?} Requested Step: {:?} Proposed Steps: {:?}", - prom_len, step_ms, sample_step - ); - // Convert into Samples and apply step_ms - //let mut out = Vec::new(); - //let mut prev_sample_ts: i64 = 0; - /* - for (i, pdp) in prom_vec.into_iter().enumerate() { - if i == 0 { - out.push(Sample{value: pdp.point, timestamp: pdp.time}); - prev_sample_ts = pdp.time; - continue; - } - if pdp.time < prev_sample_ts + step_ms { continue; } - out.push(Sample{value: pdp.point, timestamp: pdp.time}); - prev_sample_ts = pdp.time; - } - debug!("[MAIN] Requested Step: {:?} Proposed Steps: {:?} Original len {:?} Final len {:?}", step_ms, sample_step, prom_len, out.len()); - - out */ - prom_vec - .iter() - .map(|pdp| Sample { - value: pdp.point, - timestamp: pdp.time, - }) - .collect() - //prom_vec.iter().step_by(sample_step).map(|pdp| Sample{value: pdp.point, timestamp: pdp.time}).collect() - //let flac_content = get_flac_samples(metric, start_ms, end_ms).unwrap(); - // Flac reader is ignoring step returning way to many samples. So we have to deal with step here - // Transforming the result into Samples - //let step_size: usize = (step_ms/DATA_INTERVAL_MSEC).try_into().unwrap(); - //debug!(" # of FLaC samples: {} Step size ms: {} Internal step: {}", flac_content.len(), step_ms, step_size); - //flac_content.iter().step_by(step_size).enumerate().map(|(i, sample)| Sample{value: *sample as f64, timestamp: (start_ms + (i as i64)*step_ms) as i64}).collect() -} - -fn parse_remote_write_request( - timeseries: &TimeSeries, - metadata: Option<&MetricMetadata>, -) -> Result<()> { - debug!("[WRITE] samples: {:?}", timeseries.samples); - debug!("[WRITE] labels: {:?}", timeseries.labels); - debug!("[WRITE] metadata: {:?}", metadata); - - let mut metric: Option<&str> = None; - let mut source: Option<&str> = None; - let mut job: Option<&str> = None; - - for label in ×eries.labels { - match label.name.as_str() { - "__name__" => metric = Some(&label.value), - "instance" => source = Some(&label.value), - "job" => job = Some(&label.value), - _ => (), - } - } - - if let (Some(metric), Some(source), Some(job)) = (metric, source, job) { - // Not going to share state, flush it once you're done. - // TODO: #3 Improve write performance (?) - let mut metric_data: Vec<(i64, f64)> = timeseries - .samples - .iter() - .map(|x| (x.timestamp, x.value)) - .collect(); - if timeseries.samples.is_empty() { - error!("[WRITE][MAIN] Empty samples: {:?}", timeseries.samples); - return Ok(()); - } - let mut wav_metric = WavMetric::new( - metric.to_string(), - source.to_string(), - job.to_string(), - metric_data[0].0, - ); - let mutable_metric_data = &mut metric_data; - wav_metric.add_bulk_timeseries(mutable_metric_data); - match wav_metric.flush() { - Ok(_) => return Ok(()), - Err(_samples) => { - // TODO: Improve this situation... (Retry?) - return Ok(()); - } - } - } else { - warn!("[WRITE] Missing metric or source"); - } - Ok(()) -} - -#[derive(Clone, Copy)] -struct FlacStorage; - -impl FlacStorage { - fn with_context() -> impl Filter + Clone { - warp::any().map(|| 1) - } -} - -#[async_trait] -impl RemoteStorage for FlacStorage { - type Err = Error; - type Context = u64; - // TODO: Figure out why the empty Results - async fn write(&self, _ctx: Self::Context, req: WriteRequest) -> Result<()> { - trace!("[MAIN][WRITE] req:{req:?}"); - if req.metadata.is_empty() { - for timeseries in req.timeseries { - let _ = parse_remote_write_request(×eries, None); - //break; - } - } else { - for (timeseries, metadata) in req.timeseries.iter().zip(req.metadata.iter()) { - let _ = parse_remote_write_request(timeseries, Some(metadata)); - } - } - Ok(()) - } - - async fn process_query(&self, _ctx: &Self::Context, query: Query) -> Result { - debug!("[MAIN] flac read, req:{query:?}"); - let metric = &query.matchers[0].value; - // TODO: Get these values from somewhere else - let job = "flac-remote"; - let instance = "localhost:9090"; - Ok(QueryResult { - timeseries: vec![TimeSeries { - labels: vec![ - Label { - name: "job".to_string(), - value: job.to_string(), - }, - Label { - name: "instance".to_string(), - value: instance.to_string(), - }, - Label { - name: "__name__".to_string(), - value: metric.to_string(), - }, - ], - samples: get_flac_samples_to_prom( - metric, - instance, - job, - query.start_timestamp_ms, - query.end_timestamp_ms, - query - .hints - .as_ref() - .map(|hint| hint.step_ms) - .unwrap_or(1000), - ), - ..Default::default() - }], - }) - } -} - -#[tokio::main(flavor = "current_thread")] -// BIG TODO: Make the code configurable (loads of hardcoded stuff) -async fn main() { - env_logger::init(); - info!("FFT-Storage v. {}", VERSION); - let storage = Arc::new(FlacStorage); - let write_api = warp::path!("write") - .and(web::warp::with_remote_storage(storage.clone())) - .and(FlacStorage::with_context()) - .and(web::warp::protobuf_body()) - .and_then(web::warp::write); - let query_api = warp::path!("read") - .and(web::warp::with_remote_storage(storage)) - .and(FlacStorage::with_context()) - .and(web::warp::protobuf_body()) - .and_then(web::warp::read); - - let routes = warp::path("api").and(write_api.or(query_api)); - let port = 9201; - info!("Server up, listening on {} port {}", "127.0.0.1", port); - warp::serve(routes).run(([127, 0, 0, 1], port)).await; -} diff --git a/prometheus-remote/src/wav_writer.rs b/prometheus-remote/src/wav_writer.rs deleted file mode 100644 index 86a4b90..0000000 --- a/prometheus-remote/src/wav_writer.rs +++ /dev/null @@ -1,253 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use chrono::{DateTime, Utc}; -use hound::{WavSpec, WavWriter}; -use std::fs::File; -use std::fs::{metadata, OpenOptions}; -use std::process::Command; - -use crate::lib_vsri::{day_elapsed_seconds, Vsri}; - -// --- Write layer -// Remote write spec: https://prometheus.io/docs/concepts/remote_write_spec/ -pub struct WavMetric { - pub metric_name: String, // Metric name provided by prometheus - pub instance: String, // Instance name provided by prometheus - pub job: String, // Job name provided by prometheus - pub timeseries_data: Vec<(i64, f64)>, // Sample Data - pub creation_time: String, // The timestamp that this structure was created. - pub last_file_created: Option, // Name of the last file created, !! might not make sense anymore !! -} -// Here is where things get tricky. Either we have a single strutcure and implement several WavWriters or we segment at the metric collection level. -// The advantage of implementing at the writing level is that we can look into the data and make a better guess based on the data. -// There is also the problem of not understanding the data clearly, or not having the WHOLE data available and making assumptions on -// a incomplete dataset. -// Another way we can/should get around this would be "hinting" for the data type. -// If we are dealing with percentages we can go with i16, etc. -// Option B, less optimal, but more functional, is breaking f64 in 16bit parts and storing each part in its own channel. -// We are choosing option B! - -impl WavMetric { - /// Create a new WavMetric struct. `start_sample_ts` *must be* timestamp with miliseconds! - pub fn new(name: String, source: String, job: String, start_sample_ts: i64) -> WavMetric { - // Sample needs to fall within the file that the TS refers too, not the calendar day - let start_date = DateTime::::from_timestamp(start_sample_ts / 1000, 0).unwrap(); - // TODO: Do not ignore JOB! - WavMetric { - metric_name: name, - instance: source, - job, - timeseries_data: Vec::new(), - creation_time: start_date.format("%Y-%m-%d").to_string(), - last_file_created: None, - } - } - /// Flushes the metric to a WAV file - // TODO: Unwrap hell in here. Need better error control - // Too many assumptions on correct behavior of all the code. Assumption is the mother of all... Needs to be fixed - pub fn flush(mut self) -> Result<(), i32> { - let mut processed_samples: i32 = 0; - let vsri: Option; - if self.timeseries_data.is_empty() { - // Can't flush empty data - error!("[WRITE][WAV] Call flush on empty data"); - return Err(processed_samples); - } - // Append if file exists, otherwise create spec and flush a new file - let mut wav_writer = match self.last_file_created.is_none() { - true => { - let handlers = self.create_file().unwrap(); - vsri = Some(handlers.1); - handlers.0 - } - false => { - let file = OpenOptions::new() - .write(true) - .read(true) - .open(self.last_file_created.unwrap()) - .unwrap(); - // Load the index file - // TODO: one more unwrap to work on later - vsri = Some(Vsri::load(&self.metric_name).unwrap()); - WavWriter::new_append(file).unwrap() - } - }; - // TODO: #12 Check if the timestamp is one day ahead, if so, create another file, and pack the previous one as FLAC - let vsri_unwrapped = &mut vsri.unwrap(); - let mut int_r: Result<(), i32> = Ok(()); - for (ts, sample) in self.timeseries_data { - let short_ts = ts / 1000; - let r = vsri_unwrapped.update_for_point(day_elapsed_seconds(short_ts)); - if r.is_err() { - // Period changed (default: day) - warn!("[WRITE][WAV] Detected a day change while processing samples. Wrote {} before error.", processed_samples); - int_r = Err(processed_samples); - break; - } - let channel_data = WavMetric::split_f64_into_i16s(sample); - // Write the samples interleaved - for sample in channel_data { - let ww = wav_writer.write_sample(sample); - if ww.is_err() { - error!( - "[WAVWRITER] Unable to write sample {:?} in file {:?}!", - sample, self.metric_name - ); - return Err(processed_samples); - } - } - processed_samples += 1; - } - debug!("[WRITE][WAV] Wrote {} samples", processed_samples); - // TODO: Process there errors too, create different errors here - let r = vsri_unwrapped.flush(); - if r.is_err() { - error!( - "[WAVWRITER] Unable to flush VSRI for {:?}!", - self.metric_name - ); - panic!( - "[WAVWRITER] Failed flushing index. Lost information. {}", - r.unwrap_err() - ) - } - let r2 = wav_writer.finalize(); - if r2.is_err() { - error!( - "[WAVWRITER] Unable to flush WAV file {:?}!", - self.metric_name - ); - panic!( - "[WAVWRITER] Failed flushing file. Lost information. {}", - r.unwrap_err() - ) - } - int_r - } - - /// Create a file accordingly to the day of the year, the metric and the instance that generated the metric - /// TODO: Create file shouldn't open a file for append. Should only create. Fix this (or rename) - fn create_file(&mut self) -> Result<(WavWriter, Vsri), hound::Error> { - let spec = WavMetric::generate_wav_header(None); - let file_name = format!( - "{}_{}_{}", - self.metric_name, self.instance, self.creation_time - ); - let file_path = format!("./{}.wav", file_name); - // Create a new WAV file, if exists or open the existing one - if let Ok(meta) = metadata(&file_path) { - if meta.is_file() { - let file = OpenOptions::new().write(true).read(true).open(&file_path)?; - let wav_writer = WavWriter::new_append(file)?; - return Ok((wav_writer, Vsri::load(&file_name).unwrap())); - } - } - let file = File::create(&file_path)?; - let wav_writer = WavWriter::new(file, spec)?; - self.last_file_created = Some(file_path); - // TODO: Y can't be 0. Needs to be TS - Ok((wav_writer, Vsri::new(&file_name))) - } - - /// Generate the WAV file header. - fn generate_wav_header(channels: Option) -> WavSpec { - hound::WavSpec { - channels: channels.unwrap_or(4) as u16, - sample_rate: 8000, - bits_per_sample: 16, - sample_format: hound::SampleFormat::Int, - } - } - - /// Add a single metric value to the structure - pub fn add_timeseries(mut self, ts: i64, value: f64) { - self.timeseries_data.push((ts, value)) - } - - /// Add a vector of data to the existing timeseries - pub fn add_bulk_timeseries(&mut self, timeseries: &mut Vec<(i64, f64)>) { - self.timeseries_data.append(timeseries) - } - - /// Read a range in the structure - pub fn get_range(self, ts_start: i64, ts_end: i64) -> Vec<(i64, f64)> { - let mut i = 0; - let mut j = 0; - for (count, (ts, _)) in self.timeseries_data.iter().enumerate() { - if *ts < ts_start { - i = count - } - if *ts < ts_end { - j = count; - break; - } - } - if i > 0 { - return self.timeseries_data[i - 1..j].to_vec(); - } - self.timeseries_data[..j].to_vec() - } - - /// Instead of chasing data types and converting stuff, let's just unpack the f64 and - /// put it into different channels. This way we can always garantee a clean i16 Wave file - fn split_f64_into_i16s(value: f64) -> [i16; 4] { - let bits: u64 = value.to_bits(); - - let i16_1 = (bits & 0xFFFF) as i16; - let i16_2 = ((bits >> 16) & 0xFFFF) as i16; - let i16_3 = ((bits >> 32) & 0xFFFF) as i16; - let i16_4 = ((bits >> 48) & 0xFFFF) as i16; - - [i16_1, i16_2, i16_3, i16_4] - } - - /// Recreate a f64 - fn create_f64_from_16bits(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) - } - - /// Rotate the wav file after the interval and save it as a FLaC file - fn rotate_wav_into_flac(self) { - let file_in = format!( - "{}_{}_{}.wav", - self.metric_name, self.instance, self.creation_time - ); - let file_out = format!( - "{}_{}_{}.flac", - self.metric_name, self.instance, self.creation_time - ); - // Command: sox input.wav output.flac - let output = Command::new("sox") - .arg(file_in) - .arg(file_out) - .output() - .expect("Error converting WAV to FLAC"); - if !output.status.success() { - panic!("Could not rotate file!") - } - } - - /// Check if the current timestamp is within the file period - fn is_ts_valid(_ts: i64) -> bool { - true - } -} diff --git a/tools/src/bin/dwt_finder.rs b/tools/src/bin/dwt_finder.rs deleted file mode 100644 index 55d239e..0000000 --- a/tools/src/bin/dwt_finder.rs +++ /dev/null @@ -1,82 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use clap::Parser; -use dtw_rs::{Algorithm, DynamicTimeWarping, ParameterizedAlgorithm}; - -fn read_metrics_from_wav(filename: &str) -> Vec { - let r_reader = hound::WavReader::open(filename); - let mut reader = match r_reader { - Ok(reader) => reader, - Err(_err) => { - return Vec::new(); - } - }; - let num_channels = reader.spec().channels as usize; - - let mut raw_data: Vec = Vec::new(); - let mut u64_holder: [u16; 4] = [0, 0, 0, 0]; - - // Iterate over the samples and channels and push each sample to the vector - let mut current_channel: usize = 0; - for sample in reader.samples::() { - u64_holder[current_channel] = sample.unwrap() as u16; - current_channel += 1; - if current_channel == num_channels { - raw_data.push(join_u16_into_f64(u64_holder)); - current_channel = 0; - } - } - raw_data -} - -fn join_u16_into_f64(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) -} - -#[derive(Parser, Default, Debug)] -struct Arguments { - /// First wav file - file_one: String, - /// Second wav file - file_two: String, - /// Distance - distance: usize, - /// Block size - block: usize, -} - -fn main() { - let args = Arguments::parse(); - println!("{:?}", args); - - let binding_a: Vec = read_metrics_from_wav(&args.file_one); - let binding_b: Vec = read_metrics_from_wav(&args.file_two); - let vec_slices_a: Vec<&[f64]> = binding_a.chunks(args.block).collect(); - let vec_slices_b: Vec<&[f64]> = binding_b.chunks(args.block).collect(); - let data_a = vec_slices_a[0]; - let data_b = vec_slices_b[0]; - - let param = dtw_rs::Restriction::Band(args.distance); - let dtw = DynamicTimeWarping::with_param(data_a, data_b, param); - - println!("Path: {:?}, Distance: {}", dtw.path(), dtw.distance()); -} diff --git a/tools/src/bin/flac_reader_tester.rs b/tools/src/bin/flac_reader_tester.rs deleted file mode 100644 index 47727fa..0000000 --- a/tools/src/bin/flac_reader_tester.rs +++ /dev/null @@ -1,122 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -/* -This file compares a FLAC and WAV file and if the content is identical -Also good to test if the FLAC and WAV read routines are good -*/ - -/* Read a WAV file, */ -fn _read_metrics_from_wav(filename: &str) -> Vec { - let mut reader = hound::WavReader::open(filename).unwrap(); - let num_samples = reader.len() as usize / reader.spec().channels as usize; - let num_channels = reader.spec().channels as usize; - - // Create a vector to hold the audio data - let mut samples = Vec::with_capacity(num_samples * num_channels); - - // Iterate over the samples and channels and push each sample to the vector - for sample in reader.samples::() { - samples.push(sample.unwrap()); - } - samples -} - -/* Read a FLAC file */ -fn read_metrics_from_flac(filename: &str) -> Vec { - let mut reader = claxon::FlacReader::open(filename).unwrap(); - // Create a vector to hold the audio data - let mut samples = Vec::with_capacity(reader.streaminfo().samples.unwrap() as usize); - for sample in reader.samples() { - samples.push(sample.unwrap() as u16); - } - samples -} - -fn read_metrics_from_flac_by_bloc(filename: &str) -> Vec { - let mut sample_vec: Vec = Vec::new(); - let mut reader = claxon::FlacReader::open(filename).unwrap(); - let channels = reader.streaminfo().channels as usize; - let mut sample_channel_data = vec![0u16; channels]; - let mut frame_reader = reader.blocks(); - let mut block = claxon::Block::empty(); - - loop { - // Read a single frame. Recycle the buffer from the previous frame to - // avoid allocations as much as possible. - match frame_reader.read_next_or_eof(block.into_buffer()) { - Ok(Some(next_block)) => block = next_block, - Ok(None) => break, // EOF. - Err(error) => panic!("[DEBUG][READ][FLAC] {}", error), - } - for sample in 0..block.duration() { - #[allow(clippy::needless_range_loop)] - for channel in 0..channels { - sample_channel_data[channel] = block.sample(channel as u32, sample) as u16; - } - - // Process the sample_channel_data as needed - for &sample in &sample_channel_data { - sample_vec.push(sample); - } - - // Optionally, can print debug information - println!( - "Sample {}/{}, Channels: {:?}", - sample, - block.duration(), - &sample_channel_data - ); - } - } - sample_vec -} - -fn _read_metrics_from_flac_in_interval(filename: &str, start: u32, end: u32) -> Vec { - let mut reader = claxon::FlacReader::open(filename).unwrap(); - // Create a vector to hold the audio data - let start_sample = start * reader.streaminfo().sample_rate; - let end_sample = end * reader.streaminfo().sample_rate; - //let mut samples = Vec::with_capacity(reader.streaminfo().samples.unwrap() as usize); - let mut samples: Vec = Vec::new(); - for (i, sample) in reader.samples().enumerate() { - let i = i as u32; - if start_sample <= i && i <= end_sample { - samples.push(sample.unwrap() as i16); - } else if i > end_sample { - break; - } - } - samples -} - -fn main() { - println!("Testing, does FLAC reading is the same as WAV?"); - let _filename = "2023-05-11_15-11-19.wav"; - let filename_flac = - "/home/crolo/code/prom_data/go_memstats_frees_total_localhost:9090_2023-07-07.flac"; - let _filename_flac_single = "3_single_channel.flac"; - //let samples = read_metrics_from_wav(filename); - //println!("{:?}", samples); - let samples_flac = read_metrics_from_flac(filename_flac); - let samples_flac_b = read_metrics_from_flac_by_bloc(filename_flac); - println!("{:?}", samples_flac); - println!("{:?}", samples_flac_b); - assert_eq!(samples_flac_b, samples_flac); - //let samples_flac_in_interval = read_metrics_from_flac_in_interval(filename_flac, 5, 7); - println!("Sample Flac {:?}", samples_flac.len()); - println!("Sample Flac {:?}", samples_flac_b.len()); -} diff --git a/tools/src/bin/mid_channel_computing.rs b/tools/src/bin/mid_channel_computing.rs deleted file mode 100644 index c3a0793..0000000 --- a/tools/src/bin/mid_channel_computing.rs +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright 2024 NetApp, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use std::fs::File; - -use clap::Parser; -use hound::{WavSpec, WavWriter}; - -fn read_metrics_from_wav(filename: &str) -> Vec { - let r_reader = hound::WavReader::open(filename); - let mut reader = match r_reader { - Ok(reader) => reader, - Err(_err) => { - return Vec::new(); - } - }; - let num_channels = reader.spec().channels as usize; - let bit_depth = reader.spec().bits_per_sample; - - let mut raw_data: Vec = Vec::new(); - - // This is a very special case where the wav file holds a 64bit float spread over 4 16bit channels - if num_channels == 4 && bit_depth == 16 { - // Iterate over the samples and channels and push each sample to the vector - let mut u64_holder: [u16; 4] = [0, 0, 0, 0]; - let mut current_channel: usize = 0; - - for sample in reader.samples::() { - u64_holder[current_channel] = sample.unwrap() as u16; - current_channel += 1; - if current_channel == num_channels { - raw_data.push(join_u16_into_f64(u64_holder)); - current_channel = 0; - } - } - } else { - for sample in reader.samples::() { - raw_data.push(sample.unwrap() as f64); - } - } - - raw_data -} - -fn join_u16_into_f64(bits: [u16; 4]) -> f64 { - let u64_bits = (bits[0] as u64) - | ((bits[1] as u64) << 16) - | ((bits[2] as u64) << 32) - | ((bits[3] as u64) << 48); - - f64::from_bits(u64_bits) -} - -fn write_optimal_int_wav(filename: &str, data: Vec, bitdepth: i32, channels: i32) { - let header: WavSpec = generate_wav_header(Some(channels), bitdepth as u16, 8000); - let file_path = format!("{filename}.wav"); - let file = File::create(file_path).unwrap(); - let mut wav_writer = WavWriter::new(file, header).unwrap(); - for sample in data { - let _ = wav_writer.write_sample(sample as i8); - } - let _ = wav_writer.finalize(); -} - -fn generate_wav_header(channels: Option, bitdepth: u16, samplerate: u32) -> WavSpec { - hound::WavSpec { - channels: channels.unwrap_or(4) as u16, - sample_rate: samplerate, - bits_per_sample: bitdepth, - sample_format: hound::SampleFormat::Int, - } -} - -fn calculate_mid_channel(left: Vec, right: Vec) -> (Vec, Vec) { - // We might have different sizes - let min_size = left.len().min(right.len()); - let mut mid: Vec = Vec::with_capacity(min_size); - let mut sides: Vec = Vec::with_capacity(min_size); - // Formulas are easy, mid = 0.5*(left + right), sides = 0.5 * (left – right) - for i in 0..min_size { - mid.push((left[i] as i16 + right[i] as i16) / 2); - sides.push((left[i] as i16 - right[i] as i16) / 2); - } - (mid, sides) -} - -#[derive(Parser, Default, Debug)] -struct Arguments { - /// First wav file - file_one: String, - /// Second wav file - file_two: String, - /// reverse the process, picks the first file, writes in the second - #[arg(long, action)] - reverse: bool, - /// Don't write a file, just dump the calculated contents - #[arg(long, action)] - dump: bool, -} - -fn main() { - let args = Arguments::parse(); - println!("{:?}", args); - - let binding_a: Vec = read_metrics_from_wav(&args.file_one); - let binding_b: Vec = read_metrics_from_wav(&args.file_two); - let (mid, sides) = calculate_mid_channel(binding_a, binding_b); - if args.dump { - println!("Mid: {:?}", mid); - println!("Sides: {:?}", sides); - } else { - write_optimal_int_wav("mid", mid, 16, 1); - write_optimal_int_wav("side", sides, 8, 1); - } -} diff --git a/tools/src/bin/wav2wbro.rs b/tools/src/bin/wav2wbro.rs index 2ecf975..be7d8f3 100644 --- a/tools/src/bin/wav2wbro.rs +++ b/tools/src/bin/wav2wbro.rs @@ -23,12 +23,10 @@ use wavbrro::wavbrro::WavBrro; // --- Legacy stuff to read brro "wav" files --- fn is_wav_file(file_path: &Path) -> bool { - // Open the file for reading and read the first 12 bytes (header) of the file let mut file = File::open(file_path).expect("Can't open file!"); let mut header = [0u8; 12]; file.read_exact(&mut header).expect("File too small!"); - // Check if the file starts with "RIFF" and ends with "WAVE" in the header header.starts_with(b"RIFF") && &header[8..12] == b"WAVE" } @@ -45,7 +43,6 @@ fn read_metrics_from_wav(filename: &str) -> Vec { let mut raw_data: Vec = Vec::new(); let mut u64_holder: [u16; 4] = [0, 0, 0, 0]; - // Iterate over the samples and channels and push each sample to the vector let mut current_channel: usize = 0; for sample in reader.samples::() { u64_holder[current_channel] = sample.unwrap() as u16; @@ -70,15 +67,13 @@ fn join_u16_into_f64(bits: [u16; 4]) -> f64 { } out } -// --- Legacy ends (I need to stop lying to myself...) --- +// --- Legacy ends --- #[derive(Parser, Default, Debug)] #[command(author, version, about="WAV to WAVBRRO converter", long_about = None)] struct Args { - /// input file input: PathBuf, - /// Verbose output, dumps everysample in the input file (for compression) and in the ouput file (for decompression) #[arg(long, action)] validate: bool, } @@ -91,16 +86,13 @@ fn main() { assert!(is_wav_file(&arguments.input)); let wav_data = read_metrics_from_wav(filename); let mut wb = WavBrro::new(); - // Clean NaN wav_data.iter().for_each(|x| { if !x.is_nan() { wb.add_sample(*x) } }); - // Write the file let wavbrro_file = format!("{}wbro", filename.strip_suffix("wav").unwrap()); wb.to_file(Path::new(&wavbrro_file)); - // Checking the results if arguments.validate { let brro_data = wb.get_samples(); assert_eq!(wav_data, brro_data); diff --git a/vsri/src/lib.rs b/vsri/src/lib.rs index a9a4f00..0487e53 100644 --- a/vsri/src/lib.rs +++ b/vsri/src/lib.rs @@ -96,7 +96,7 @@ pub fn start_day_ts(dt: DateTime) -> i64 { /// max_ts: the highest TS available in this file /// vsri_segments: Description of each segment /// [sample_rate (m), initial_point(x,y), # of samples(length)] -/// Each segments describes a line with the form of mX + B that has a lenght +/// Each segments describes a line with the form of mX + B that has a length /// of # of samples. #[derive(Debug, Default)] pub struct Vsri { diff --git a/wavbrro/src/read.rs b/wavbrro/src/read.rs index 000379e..709e430 100644 --- a/wavbrro/src/read.rs +++ b/wavbrro/src/read.rs @@ -19,9 +19,7 @@ use std::fs::File; use std::io::{self, Read, Seek, SeekFrom}; use std::path::Path; -// Function to check if a file is a WAV file pub fn is_wavbrro_file(file_path: &Path) -> io::Result { - // Open the file for reading and read the first 12 bytes (header) of the file let mut file = fs::File::open(file_path)?; let mut header = [0u8; 12]; file.read_exact(&mut header)?; diff --git a/wavbrro/src/wavbrro.rs b/wavbrro/src/wavbrro.rs index 1839d11..74de5e6 100644 --- a/wavbrro/src/wavbrro.rs +++ b/wavbrro/src/wavbrro.rs @@ -32,7 +32,6 @@ const MAX_CHUNK_SIZE: usize = 2048; // API, you have to derive CheckBytes for the archived type: check_bytes, )] -// Derives can be passed through to the generated type: #[archive_attr(derive(Debug))] pub struct WavBrro { // We can infer chunk count from here -> chunk count = ceil(sample_count/MAX_CHUNK_SIZE) @@ -74,7 +73,6 @@ impl WavBrro { self.chunks.push(Vec::with_capacity(MAX_CHUNK_SIZE)); } - // Receives a slice of f64 and writes in it's internal structure fn from_slice(data: &[f64]) -> Self { let sample_count = data.len(); WavBrro { @@ -92,7 +90,7 @@ impl WavBrro { self.sample_count += 1; } - // This should be generic, but first implementation is going to be Vec f64 + // TODO: This should be generic, but first implementation is going to be Vec f64 // This consumes self! pub fn get_samples(self) -> Vec { self.chunks.into_iter().flatten().collect::>() diff --git a/wavbrro/src/write.rs b/wavbrro/src/write.rs index c73a861..e21cd25 100644 --- a/wavbrro/src/write.rs +++ b/wavbrro/src/write.rs @@ -19,9 +19,7 @@ use std::os::unix::prelude::FileExt; use std::path::Path; pub fn write_wavbrro_file(file_path: &Path, content: &[u8]) { - // The content of the header let header: [u8; 12] = *b"WBRO0000WBRO"; - // We need to put the header in front let file = File::create(file_path).expect("Can't create file!"); file.write_at(&header, 0).expect("Fail to write header"); file.write_at(content, header.len() as u64)