diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..9f3df2d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/data/ +/target/ +Cargo.lock +*.sublime-workspace +**/*.rs.bk diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..c4364948 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,78 @@ +[package] + name = "multisql" + version = "0.2.0" + authors = ["Kyran Gostelow ", "Taehoon Moon "] + edition = "2021" + description = "MultiSQL" + license = "Apache-2.0" + repository = "https://github.com/MultiSQL/multisql" + readme = "README.md" + keywords = ["database", "database-engine", "sql", "modular", "multi-threaded"] + metadata.docs.rs.all-features = true + +[features] + default = [ + # Storages + "sled-database", + "csv-database", + "sheet-database", + "memory-database", + # Functionality + "alter-table", + "auto-increment", + # Data + "implicit_float_conversion", + ] + + # Storages + sled-database = ["sled", "bincode"] + csv-database = ["csv", "linecount"] + sheet-database = ["umya-spreadsheet"] + memory-database = [] + + # Functionality + alter-table = [] + auto-increment = [] + + # Data + implicit_float_conversion = [] + + +[dependencies] + async-trait = "0.1.41" + async-recursion = "0.3.1" + boolinator = "2.4.0" + futures = "0.3" + serde = { version = "1.0.117", features = ["derive"] } + sqlparser = { version = "0.16.0", features = ["serde"] } + thiserror = "1.0.21" + fstrings = "0.2.3" + chrono = { version = "0.4.19", features = ["serde", "unstable-locales"] } + concat-idents = "1.1.2" + thousands = "0.2.0" + fast-float = "0.2.0" # Unclear if these make any real difference + lexical = "6.1.0" + fastrand = "1.7.0" + rayon = "1.5.1" + serde_json = "1.0.0" + uuid = { version = "0.8", features = ["serde", "v4"] } + serde_yaml = "0.8.23" + + # OPTIONAL DEPENDENCIES + # Storages + # Sled + bincode = { version = "1.3.1", optional = true } + sled = { version = "0.34.7", optional = true } + # CSV + csv = { version = "1.1.6", optional = true } + linecount = { version = "0.1.0", optional = true } + # Sheet + umya-spreadsheet = { version = "0.7.0", optional = true } + +[dev-dependencies] + tokio = { version = "0.3.3", features = ["macros", "rt"] } + criterion = {version = "0.3.5", features = ["html_reports"] } + +[[bench]] + name = "bench" + harness = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..9d711f64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2020 Taehoon Moon +Copyright 2021 Kyran Gostelow + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..db7db206 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# MultiSQL +[![docs.rs](https://docs.rs/multisql/badge.svg)](https://docs.rs/multisql) +[![crates.io](https://img.shields.io/crates/v/multisql.svg)](https://crates.io/crates/multisql) +[![codecov](https://codecov.io/gh/KyGost/multisql/branch/main/graph/badge.svg?token=RX0OCX7AJ6)](https://codecov.io/gh/KyGost/multisql) +[![Chat](https://img.shields.io/discord/780298017940176946)](https://discord.gg/C6TDEgzDzY) +[![LICENSE](https://img.shields.io/crates/l/gluesql.svg)](https://github.com/KyGost/multisql/blob/main/LICENSE) +[![Rust](https://github.com/KyGost/multisql/actions/workflows/rust.yml/badge.svg)](https://github.com/KyGost/multisql/actions/workflows/rust.yml) + +Diverged from [GlueSQL](https://github.com/gluesql/gluesql) as of [GlueSQLv0.5.0](https://github.com/gluesql/gluesql/releases/tag/v0.5.0). + +See origin differences at [#8](https://github.com/SyRis-Consulting/gluesql/pull/8). + +See [benchmarks](./benches) + + +## Documentation +For SQL documentation, go to [multisql.org](https://multisql.org) + +For Rust documentation, go to [docs.rs/multisql](https://docs.rs/multisql/latest/multisql/) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..97944e42 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,2 @@ +# Security Policy +None currently diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 00000000..cc1059f5 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,29 @@ +# Benchmarks + +## Results +[See results](https://htmlpreview.github.io/?https://github.com/KyGost/multisql/blob/main/benches/criterion/report/index.html) + +## Tests +[See tests](./bench.rs) + +## Hardware +2950X (AMD Ryzen 16 Core (32 Thread) CPU) +32GB 3000MHz (DDR4 RAM) +Running: +- Linux kernel 5.13.0-30-generic (64-bit) +- FerenOS 2021.10 + + +## Simple overview +- Filtering 100,000 rows down to 100 + - 500 μs indexed + - 80,000 μs unindexed +- Filtering 100,000 rows down to 1 + - 82,000 μs indexed (index optimisations not yet implemented) + - 86,000 μs unindexed +- Grouping and summing 100,000 rows into 10,000 groups + - 1,389,000 μs indexed (index optimisations not yet implemented) + - 1,421,000 μs unindexed +- Joining 100,000 rows to 10,000 rows, grouping them into 10,000 groups and summing them + - 588,000 μs indexed (index optimisations not yet implemented) + - 598,000 μs unindexed diff --git a/benches/bench.rs b/benches/bench.rs new file mode 100644 index 00000000..30d05124 --- /dev/null +++ b/benches/bench.rs @@ -0,0 +1,216 @@ +use { + criterion::*, + multisql::{Glue, SledStorage, Storage, Value}, + std::time::Duration, +}; + +fn setup_glue() -> Glue { + let path = "data/sled_bench"; + + match std::fs::remove_dir_all(&path) { + Ok(()) => (), + Err(e) => { + println!("fs::remove_file {:?}", e); + } + } + + let storage = SledStorage::new(path) + .map(Storage::new_sled) + .expect("Create Storage"); + + Glue::new(String::from("main"), storage) +} + +fn setup_a(glue: &mut Glue) { + let rows: Vec> = (0..10_000).into_iter().map(|pk| vec![pk.into()]).collect(); + glue.execute( + " + CREATE TABLE A ( + pk INTEGER PRIMARY KEY + ) + ", + ) + .unwrap(); + glue.execute( + " + CREATE INDEX primkey ON A (pk) + ", + ) + .unwrap(); + glue.insert_vec(String::from("A"), vec![String::from("pk")], rows) + .unwrap(); +} + +fn setup_b(glue: &mut Glue) { + let rows: Vec> = (0..100_000) + .into_iter() + .map(|_row| vec![fastrand::i64(0..10_000).into(), fastrand::f64().into()]) + .collect(); + glue.execute( + " + CREATE TABLE B ( + pk INTEGER AUTO_INCREMENT PRIMARY KEY, + fk INTEGER, + val FLOAT + ) + ", + ) + .unwrap(); + glue.execute( + " + CREATE INDEX primkey ON B (pk) + ", + ) + .unwrap(); + glue.insert_vec( + String::from("B"), + vec![String::from("fk"), String::from("val")], + rows, + ) + .unwrap(); +} + +fn setup_c(glue: &mut Glue) { + let rows: Vec> = (0..100_000) + .into_iter() + .map(|_row| vec![fastrand::i64(0..10_000).into(), fastrand::f64().into()]) + .collect(); + glue.execute( + " + CREATE TABLE C ( + pk INTEGER AUTO_INCREMENT PRIMARY KEY, + fk INTEGER, + val FLOAT + ) + ", + ) + .unwrap(); + glue.insert_vec( + String::from("C"), + vec![String::from("fk"), String::from("val")], + rows, + ) + .unwrap(); +} + +fn setup() -> Glue { + let mut glue = setup_glue(); + setup_a(&mut glue); + setup_b(&mut glue); + setup_c(&mut glue); + glue +} + +fn filter(table: &str) -> String { + format!( + " + SELECT + * + FROM + {} + WHERE + pk < 100 + ", + table + ) +} +fn find(table: &str) -> String { + format!( + " + SELECT + * + FROM + {} + WHERE + pk = 100 + ", + table + ) +} +fn sum_group(table: &str) -> String { + format!( + " + SELECT + SUM(val) + FROM + {} + GROUP BY + fk + ", + table + ) +} +fn join(table: &str) -> String { + format!( + " + SELECT + SUM(val) + FROM + A + INNER JOIN {table} + ON {table}.fk = A.pk + GROUP BY + A.pk + ", + table = table + ) +} + +fn bench(criterion: &mut Criterion) { + let mut glue = setup(); + + let mut group = criterion.benchmark_group("filter"); + group.bench_function("a", |benchmarker| { + benchmarker.iter(|| glue.execute(&filter("A")).unwrap()); + }); + group.bench_function("b", |benchmarker| { + benchmarker.iter(|| glue.execute(&filter("B")).unwrap()); + }); + group.bench_function("c", |benchmarker| { + benchmarker.iter(|| glue.execute(&filter("C")).unwrap()); + }); + group.finish(); + + let mut group = criterion.benchmark_group("find"); + group.bench_function("a", |benchmarker| { + benchmarker.iter(|| glue.execute(&find("A")).unwrap()); + }); + group.bench_function("b", |benchmarker| { + benchmarker.iter(|| glue.execute(&find("B")).unwrap()); + }); + group.bench_function("c", |benchmarker| { + benchmarker.iter(|| glue.execute(&find("C")).unwrap()); + }); + group.finish(); + + let mut group = criterion.benchmark_group("sum_group"); + group + .sampling_mode(SamplingMode::Flat) + .measurement_time(Duration::from_secs(20)); + group.bench_function("b", |benchmarker| { + benchmarker.iter(|| glue.execute(&sum_group("B")).unwrap()); + }); + group.bench_function("c", |benchmarker| { + benchmarker.iter(|| glue.execute(&sum_group("C")).unwrap()); + }); + group.finish(); + + let mut group = criterion.benchmark_group("join"); + group + .sampling_mode(SamplingMode::Flat) + .measurement_time(Duration::from_secs(30)); + group.bench_function("b", |benchmarker| { + benchmarker.iter(|| glue.execute(&join("B")).unwrap()); + }); + group.bench_function("c", |benchmarker| { + benchmarker.iter(|| glue.execute(&join("C")).unwrap()); + }); + group.finish(); +} + +criterion_group! { + name = benches; + config = Criterion::default().noise_threshold(0.05).sample_size(10).warm_up_time(Duration::from_secs(5)).measurement_time(Duration::from_secs(10)); + targets = bench +} +criterion_main!(benches); diff --git a/multisql.sublime-project b/multisql.sublime-project new file mode 100644 index 00000000..e48dc256 --- /dev/null +++ b/multisql.sublime-project @@ -0,0 +1,14 @@ +{ + "settings": { + "detect_indentation": true, + "ensure_newline_at_eof_on_save": true, + "tab_size": 2, + "translate_tabs_to_spaces": false + }, + "folders": [ + { + "path": "./", + "folder_exclude_patterns": ["target", "//data", ".github"] + } + ] +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..0b3c3440 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,4 @@ +hard_tabs = true +#version = "Two" +#imports_layout = "vertical" +#imports_granularity = "One" diff --git a/src/data/column.rs b/src/data/column.rs new file mode 100644 index 00000000..6d566d6e --- /dev/null +++ b/src/data/column.rs @@ -0,0 +1,78 @@ +use { + crate::ValueType, + serde::{Deserialize, Serialize}, + sqlparser::{ + ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident}, + dialect::keywords::Keyword, + tokenizer::{Token, Word}, + }, +}; + +#[derive(Default, Clone, Serialize, Deserialize)] +pub struct Column { + pub name: String, + pub data_type: ValueType, + pub default: Option, + + pub is_nullable: bool, + pub is_unique: bool, +} + +impl From<&ColumnDef> for Column { + fn from(column_def: &ColumnDef) -> Self { + column_def.clone().into() + } +} +impl From for Column { + fn from(column_def: ColumnDef) -> Self { + let ColumnDef { + name: Ident { value: name, .. }, + data_type, + options, + .. + } = column_def; + + let is_nullable = options + .iter() + .any(|ColumnOptionDef { option, .. }| matches!(option, ColumnOption::Null)); + + let is_unique = options + .iter() + .any(|ColumnOptionDef { option, .. }| matches!(option, ColumnOption::Unique { .. })); + + let default = options + .iter() + .find_map(|ColumnOptionDef { option, .. }| match option { + ColumnOption::Default(expr) => Some(ValueDefault::Recipe(expr.clone())), + ColumnOption::DialectSpecific(tokens) + if matches!( + tokens[..], + [ + Token::Word(Word { + keyword: Keyword::AUTO_INCREMENT, + .. + }), + .. + ] + ) => + { + Some(ValueDefault::AutoIncrement(1)) + } + _ => None, + }); + + Self { + name, + data_type: data_type.into(), + default, + is_nullable, + is_unique, + } + } +} + +#[derive(Clone, Serialize, Deserialize)] +pub enum ValueDefault { + Recipe(Expr), // TODO: Recipe serialisation + AutoIncrement(u64), +} diff --git a/src/data/index.rs b/src/data/index.rs new file mode 100644 index 00000000..cf1a03fd --- /dev/null +++ b/src/data/index.rs @@ -0,0 +1,170 @@ +use { + crate::{result::Result, Column, DatabaseInner, Ingredient, Method, Recipe, Value}, + rayon::prelude::*, + serde::{Deserialize, Serialize}, + std::{cmp::Ordering, collections::HashMap}, +}; + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Index { + pub name: String, + pub column: String, + pub is_unique: bool, +} + +#[derive(Clone, Debug)] +pub enum IndexFilter { + LessThan(String, Value), // Index, Min, Max + MoreThan(String, Value), // Index, Min, Max + Inner(Box, Box), + Outer(Box, Box), +} + +impl Index { + pub fn new(name: String, column: String, is_unique: bool) -> Self { + Self { + name, + column, + is_unique, + } + } + pub async fn reset( + &self, + storage: &mut DatabaseInner, + table: &str, + columns: &[Column], + ) -> Result<()> { + let rows = storage.scan_data(table).await?; + let column_index: usize = columns + .iter() + .enumerate() + .find_map(|(index, def)| (def.name == self.column).then(|| index)) + .unwrap(); // TODO: Handle + + let mut rows: Vec<(Value, Vec)> = + rows.into_iter().map(|(key, row)| (key, row.0)).collect(); + rows.par_sort_unstable_by(|(_, a_values), (_, b_values)| { + a_values[column_index] + .partial_cmp(&b_values[column_index]) + .unwrap_or(Ordering::Equal) + }); + let keys = rows + .into_iter() + .map(|(key, mut values)| (values.swap_remove(column_index), key)) + .collect(); + + storage.update_index(table, &self.name, keys).await + } +} + +impl Recipe { + pub fn reduce_by_index_filter( + self, + indexed_columns: HashMap, + ) -> (Self, Option>) { + // TODO: OR & others + use IndexFilter::*; + match self { + Recipe::Ingredient(_) => (), + Recipe::Method(ref method) => match *method.clone() { + Method::BinaryOperation(operator, left, right) + if operator as usize == Value::and as usize => + { + let (left, left_filters) = left.reduce_by_index_filter(indexed_columns.clone()); + let (right, right_filters) = right.reduce_by_index_filter(indexed_columns); + return ( + Recipe::Method(Box::new(Method::BinaryOperation(operator, left, right))), + match (left_filters, right_filters) { + (Some(filters), None) | (None, Some(filters)) => Some(filters), + (Some(left_filters), Some(mut right_filters)) => Some( + left_filters + .into_iter() + .map(|(table, filter)| { + ( + table.clone(), + match right_filters.remove(&table) { + Some(right) => { + Inner(Box::new(filter), Box::new(right)) + } + None => filter, + }, + ) + }) + .collect::>() + .into_iter() + .chain(right_filters.into_iter()) + .collect::>(), + ), + (None, None) => None, // TODO: Don't unnecessarily rebuild + }, + ); + } + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(column)), + Recipe::Ingredient(Ingredient::Value(value)), + ) if operator as usize == Value::eq as usize => { + { + if let Some((table, index)) = indexed_columns.get(&column) { + let mut filters = HashMap::new(); + filters.insert( + table.clone(), + Inner( + Box::new(LessThan(index.clone(), value.inc())), + Box::new(MoreThan(index.clone(), value)), + ), + ); // Eh; TODO: Improve + return (Recipe::TRUE, Some(filters)); + } + } + } + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(column)), + Recipe::Ingredient(Ingredient::Value(value)), + ) if operator as usize == Value::gt_eq as usize => { + if let Some((table, index)) = indexed_columns.get(&column) { + let mut filters = HashMap::new(); + filters.insert(table.clone(), MoreThan(index.clone(), value)); + return (Recipe::TRUE, Some(filters)); + } + } + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(column)), + Recipe::Ingredient(Ingredient::Value(value)), + ) if operator as usize == Value::gt as usize => { + if let Some((table, index)) = indexed_columns.get(&column) { + let mut filters = HashMap::new(); + filters.insert(table.clone(), MoreThan(index.clone(), value.inc())); + return (Recipe::TRUE, Some(filters)); + } + } + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(column)), + Recipe::Ingredient(Ingredient::Value(value)), + ) if operator as usize == Value::lt as usize => { + if let Some((table, index)) = indexed_columns.get(&column) { + let mut filters = HashMap::new(); + filters.insert(table.clone(), LessThan(index.clone(), value)); + return (Recipe::TRUE, Some(filters)); + } + } + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(column)), + Recipe::Ingredient(Ingredient::Value(value)), + ) if operator as usize == Value::lt_eq as usize => { + if let Some((table, index)) = indexed_columns.get(&column) { + let mut filters = HashMap::new(); + filters.insert(table.clone(), LessThan(index.clone(), value.inc())); + return (Recipe::TRUE, Some(filters)); + } + } + _ => (), + }, + } + (self, None) + } +} diff --git a/src/data/join.rs b/src/data/join.rs new file mode 100644 index 00000000..8ac75a49 --- /dev/null +++ b/src/data/join.rs @@ -0,0 +1,110 @@ +use { + crate::{NullOrd, Value}, + std::cmp::Ordering, +}; + +pub enum JoinType { + Inner, + Outer, + Left, + Right, +} + +macro_rules! unwrap_or_break { + ($unwrap: expr) => { + match $unwrap { + Some(value) => value, + None => { + break; + } + } + }; +} + +pub fn join_iters(join_type: JoinType, a: Vec, b: Vec) -> Vec { + let mut a = a.into_iter().peekable(); + let mut b = b.into_iter().peekable(); + let mut results = vec![]; + // TODO: There's probably a better way to do this + match join_type { + JoinType::Inner => loop { + match unwrap_or_break!(a.peek()) + .null_cmp(unwrap_or_break!(&b.peek())) + .unwrap_or(Ordering::Equal) + { + Ordering::Equal => { + results.push(a.next().unwrap()); + b.next(); + } + Ordering::Less => { + a.next(); + } + Ordering::Greater => { + b.next(); + } + } + }, + JoinType::Outer => { + loop { + match unwrap_or_break!(a.peek()) + .null_cmp(unwrap_or_break!(&b.peek())) + .unwrap_or(Ordering::Equal) + { + Ordering::Less => { + results.push(a.next().unwrap()); + } + Ordering::Greater => { + results.push(b.next().unwrap()); + } + Ordering::Equal => { + results.push(a.next().unwrap()); + b.next(); + } + } + } + results.extend(a); + results.extend(b); + } + JoinType::Left => { + loop { + match unwrap_or_break!(a.peek()) + .null_cmp(unwrap_or_break!(b.peek())) + .unwrap_or(Ordering::Equal) + { + Ordering::Less => { + results.push(a.next().unwrap()); + } + Ordering::Equal => { + results.push(a.next().unwrap()); + b.next(); + } + Ordering::Greater => { + b.next(); + } + } + } + results.extend(a); + } + JoinType::Right => { + loop { + match unwrap_or_break!(a.peek()) + .null_cmp(unwrap_or_break!(&b.peek())) + .unwrap_or(Ordering::Equal) + { + Ordering::Greater => { + results.push(b.next().unwrap()); + } + Ordering::Equal => { + results.push(a.next().unwrap()); + b.next(); + } + Ordering::Less => { + a.next(); + } + } + } + results.extend(b); + } + } + results +} diff --git a/src/data/mod.rs b/src/data/mod.rs new file mode 100644 index 00000000..2d68d486 --- /dev/null +++ b/src/data/mod.rs @@ -0,0 +1,17 @@ +mod column; +mod index; +mod join; +mod row; +pub(crate) mod schema; +mod table; +pub(crate) mod value; + +pub use { + column::*, + index::{Index, IndexFilter}, + join::{join_iters, JoinType}, + row::{Row, RowError}, + schema::*, + table::{get_name, Table, TableError}, + value::*, +}; diff --git a/src/data/row.rs b/src/data/row.rs new file mode 100644 index 00000000..36fe13bf --- /dev/null +++ b/src/data/row.rs @@ -0,0 +1,28 @@ +use { + crate::{data::Value, result::Result}, + serde::{Deserialize, Serialize}, + std::fmt::Debug, + thiserror::Error, +}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum RowError { + #[error("conflict! row cannot be empty")] + ConflictOnEmptyRow, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct Row(pub Vec); + +impl Row { + pub fn get_value(&self, index: usize) -> Option<&Value> { + self.0.get(index) + } + + pub fn take_first_value(self) -> Result { + self.0 + .into_iter() + .next() + .ok_or_else(|| RowError::ConflictOnEmptyRow.into()) + } +} diff --git a/src/data/schema.rs b/src/data/schema.rs new file mode 100644 index 00000000..0865c262 --- /dev/null +++ b/src/data/schema.rs @@ -0,0 +1,184 @@ +use { + crate::{Column, Index}, + serde::{Deserialize, Serialize}, + std::collections::HashMap, +}; + +#[derive(Clone, Serialize, Deserialize)] +pub struct Schema { + pub table_name: String, + pub column_defs: Vec, + pub indexes: Vec, +} + +#[derive(Clone, Default)] +pub struct SchemaDiff { + pub table_name: Option, + pub column_defs: Option, Option>>, + pub indexes: Option, Option>>, +} +impl SchemaDiff { + pub fn new_rename(new_name: String) -> Self { + Self { + table_name: Some(new_name), + column_defs: None, + indexes: None, + } + } + pub fn new_add_column(new_column: Column) -> Self { + Self { + table_name: None, + column_defs: Some([(None, Some(new_column))].into()), + indexes: None, + } + } + pub fn new_remove_column(column_index: usize) -> Self { + Self { + table_name: None, + column_defs: Some([(Some(column_index), None)].into()), + indexes: None, + } + } + pub fn new_rename_column( + column_index: usize, + mut column: Column, + new_column_name: String, + ) -> Self { + column.name = new_column_name; + Self { + table_name: None, + column_defs: Some([(Some(column_index), Some(column))].into()), + indexes: None, + } + } + pub fn new_add_index(new_index: Index) -> Self { + Self { + table_name: None, + column_defs: None, + indexes: Some([(None, Some(new_index))].into()), + } + } +} + +impl SchemaDiff { + pub fn merge(self, mut schema: Schema) -> Schema { + if let Some(table_name) = self.table_name { + schema.table_name = table_name + } + if let Some(column_defs) = self.column_defs { + for (index, column_def) in column_defs.into_iter() { + match (index, column_def) { + (None, None) => (), + (Some(index), None) => { + schema.column_defs.remove(index); + } // TODO: WARN: Will be an issue if multiple change + (Some(index), Some(column_def)) => { + schema + .column_defs + .get_mut(index) + .map(|old_column_def| *old_column_def = column_def); + } + (None, Some(column_def)) => { + schema.column_defs.push(column_def); + } + } + } + } + if let Some(indexes) = self.indexes { + for (index, index_def) in indexes.into_iter() { + match (index, index_def) { + (None, None) => (), + (Some(index), None) => { + schema.indexes.remove(index); + } // TODO: WARN: Will be an issue if multiple change + (Some(index), Some(index_def)) => { + schema + .indexes + .get_mut(index) + .map(|old_index_def| *old_index_def = index_def); + } + (None, Some(index_def)) => { + schema.indexes.push(index_def); + } + } + } + } + schema + } +} + +impl From for SchemaDiff { + fn from(from: Schema) -> Self { + let column_defs = from + .column_defs + .into_iter() + .enumerate() + .map(|(key, col)| (Some(key), Some(col))) + .collect::, Option>>(); + let indexes = from + .indexes + .into_iter() + .enumerate() + .map(|(key, idx)| (Some(key), Some(idx))) + .collect::, Option>>(); + Self { + table_name: Some(from.table_name), + column_defs: Some(column_defs), + indexes: Some(indexes), + } + } +} + +pub enum SchemaChange { + RenameTable(String), + + ColumnUpdate(usize, Column), + ColumnAdd(Column), + ColumnRemove(usize), + + IndexUpdate(usize, Index), + IndexAdd(Index), + IndexRemove(usize), +} +impl SchemaDiff { + pub fn get_changes(&self) -> Vec { + use SchemaChange::*; + let mut changes = Vec::new(); + if let Some(table_name) = &self.table_name { + changes.push(RenameTable(table_name.clone())) + } + if let Some(column_defs) = &self.column_defs { + for (index, column_def) in column_defs.iter() { + match (index, column_def) { + (None, None) => (), + (Some(index), Some(column_def)) => { + changes.push(ColumnUpdate(*index, column_def.clone())); + } + (None, Some(column_def)) => { + changes.push(ColumnAdd(column_def.clone())); + } + (Some(index), None) => { + changes.push(ColumnRemove(*index)); + } + } + } + } + if let Some(indexes) = &self.indexes { + for (index, index_def) in indexes.iter() { + match (index, index_def) { + (None, None) => (), + (Some(index), Some(index_def)) => { + changes.push(IndexUpdate(*index, index_def.clone())); + } + (None, Some(index_def)) => { + changes.push(IndexAdd(index_def.clone())); + } + (Some(index), None) => { + changes.push(IndexRemove(*index)); + } + } + } + } + changes + } +} diff --git a/src/data/table.rs b/src/data/table.rs new file mode 100644 index 00000000..a1d7b7bf --- /dev/null +++ b/src/data/table.rs @@ -0,0 +1,55 @@ +use { + crate::result::Result, + serde::Serialize, + sqlparser::ast::{ObjectName, TableAlias, TableFactor}, + std::fmt::Debug, + thiserror::Error, +}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum TableError { + #[error("unreachable")] + Unreachable, + + #[error("TableFactorNotSupported")] + TableFactorNotSupported, +} + +pub struct Table<'a> { + name: &'a String, + alias: Option<&'a String>, +} + +impl<'a> Table<'a> { + pub fn new(table_factor: &'a TableFactor) -> Result { + match table_factor { + TableFactor::Table { name, alias, .. } => { + let name = get_name(name)?; + let alias = alias.as_ref().map(|TableAlias { name, .. }| &name.value); + + Ok(Self { name, alias }) + } + _ => Err(TableError::TableFactorNotSupported.into()), + } + } + + pub fn get_name(&self) -> &'a String { + self.name + } + + pub fn get_alias(&self) -> &'a String { + match self.alias { + Some(alias) => alias, + None => self.name, + } + } +} + +pub fn get_name(table_name: &ObjectName) -> Result<&String> { + let ObjectName(idents) = table_name; + + idents + .last() + .map(|ident| &ident.value) + .ok_or_else(|| TableError::Unreachable.into()) +} diff --git a/src/data/value/big_endian.rs b/src/data/value/big_endian.rs new file mode 100644 index 00000000..bbb44280 --- /dev/null +++ b/src/data/value/big_endian.rs @@ -0,0 +1,27 @@ +use crate::Value; + +pub trait BigEndian { + fn to_be_bytes(&self) -> Vec; +} + +const SEP: [u8; 1] = [0x00]; +const NULL: [u8; 1] = [0x01]; + +impl BigEndian for Value { + fn to_be_bytes(&self) -> Vec { + use Value::*; + match self { + Null => [SEP, NULL].concat(), + Bool(v) => [SEP, [if *v { 0x02 } else { 0x01 }]].concat(), + I64(v) => [ + SEP.as_slice(), + &[if v.is_positive() { 0x02 } else { 0x01 }], + &v.to_be_bytes(), + ] + .concat(), + U64(v) => [SEP.as_slice(), &v.to_be_bytes()].concat(), + Str(v) => [SEP.as_slice(), v.as_bytes()].concat(), + _ => unimplemented!(), + } + } +} diff --git a/src/data/value/cast.rs b/src/data/value/cast.rs new file mode 100644 index 00000000..87b38ef6 --- /dev/null +++ b/src/data/value/cast.rs @@ -0,0 +1,293 @@ +use { + super::{Convert, Value, ValueError}, + crate::{Error, Result}, + chrono::{NaiveDate, NaiveDateTime, NaiveTime, ParseError}, + std::convert::TryInto, + thousands::Separable, +}; + +pub trait Cast { + fn cast(self) -> Result; +} +pub trait CastWithRules { + fn cast_with_rule(self, rule: Self) -> Result; +} + +// Cores +impl Cast for Value { + fn cast(self) -> Result { + Ok(match self { + Value::Bool(value) => value, + Value::I64(value) => match value { + 1 => true, + 0 => false, + _ => return Err(ValueError::ImpossibleCast.into()), + }, + Value::F64(value) => { + if value.eq(&1.0) { + true + } else if value.eq(&0.0) { + false + } else { + return Err(ValueError::ImpossibleCast.into()); + } + } + Value::Str(value) => match value.to_uppercase().as_str() { + "TRUE" => true, + "FALSE" => false, + _ => return Err(ValueError::ImpossibleCast.into()), + }, + Value::Null => return Err(ValueError::ImpossibleCast.into()), + _ => unimplemented!(), + }) + } +} + +impl Cast for Value { + fn cast(self) -> Result { + Ok(match self { + Value::Bool(value) => { + if value { + 1 + } else { + 0 + } + } + Value::U64(value) => value, + Value::I64(value) => value.try_into().map_err(|_| ValueError::ImpossibleCast)?, + Value::F64(value) => (value.trunc() as i64) + .try_into() + .map_err(|_| ValueError::ImpossibleCast)?, + Value::Str(value) => lexical::parse(value).map_err(|_| ValueError::ImpossibleCast)?, + Value::Null => return Err(ValueError::ImpossibleCast.into()), + _ => unimplemented!(), + }) + } +} + +impl Cast for Value { + fn cast(self) -> Result { + Ok(match self { + Value::Bool(value) => { + if value { + 1 + } else { + 0 + } + } + Value::U64(value) => value.try_into().map_err(|_| ValueError::ImpossibleCast)?, + Value::I64(value) => value, + Value::F64(value) => value.trunc() as i64, + Value::Str(value) => lexical::parse(value).map_err(|_| ValueError::ImpossibleCast)?, + Value::Null => return Err(ValueError::ImpossibleCast.into()), + _ => unimplemented!(), + }) + } +} + +impl Cast for Value { + fn cast(self) -> Result { + Ok(match self { + Value::Bool(value) => { + if value { + 1.0 + } else { + 0.0 + } + } + Value::U64(value) => (value as f64).trunc(), + Value::I64(value) => (value as f64).trunc(), + Value::F64(value) => value, + Value::Str(value) => { + fast_float::parse(value).map_err(|_| ValueError::ImpossibleCast)? + } + Value::Null => return Err(ValueError::ImpossibleCast.into()), + _ => unimplemented!(), + }) + } +} +impl Cast for Value { + fn cast(self) -> Result { + Ok(match self { + Value::Bool(value) => (if value { "TRUE" } else { "FALSE" }).to_string(), + Value::U64(value) => lexical::to_string(value), + Value::I64(value) => lexical::to_string(value), + Value::F64(value) => lexical::to_string(value), + Value::Str(value) => value, + Value::Null => String::from("NULL"), + _ => unimplemented!(), + }) + } +} + +// Utilities +impl Cast for Value { + fn cast(self) -> Result { + let int: u64 = self.cast()?; + int.try_into() + .map_err(|_| ValueError::ImpossibleCast.into()) + } +} + +// Non-Core +impl CastWithRules for Value { + fn cast_with_rule(self, rule: Self) -> Result { + match rule { + Value::I64(000) | Value::Bool(true) => self.cast(), + _ => Err(ValueError::InvalidConversionRule.into()), + } + } +} +impl CastWithRules for Value { + fn cast_with_rule(self, rule: Self) -> Result { + match rule { + Value::I64(000) | Value::Bool(true) => self.cast(), + _ => Err(ValueError::InvalidConversionRule.into()), + } + } +} +impl CastWithRules for Value { + fn cast_with_rule(self, rule: Self) -> Result { + match rule { + Value::I64(000) | Value::Bool(true) => self.cast(), + _ => Err(ValueError::InvalidConversionRule.into()), + } + } +} +impl CastWithRules for Value { + fn cast_with_rule(self, rule: Self) -> Result { + match rule { + Value::I64(000) | Value::Bool(true) => self.cast(), + Value::Str(specified) if specified == *"DATETIME" => { + Ok(NaiveDateTime::from_timestamp(self.convert()?, 0) + .format("%F %T") + .to_string()) + } + Value::Str(specified) if specified == *"MONEY" => { + let value: f64 = self.convert()?; + let value = (value * 100.0).round() / 100.0; + let value = value.separate_with_commas(); + Ok(format!("${}", value)) + } + Value::Str(specified) if specified == *"SEPARATED" => { + let value: f64 = self.convert()?; + let value = (value * 100.0).round() / 100.0; + let value = value.separate_with_commas(); + Ok(value) + } + Value::Str(format) if matches!(self, Value::I64(..)) => { + // TODO: TIMESTAMP type + Ok(NaiveDateTime::from_timestamp(self.convert()?, 0) + .format(&format) + .to_string()) + } + _ => Err(ValueError::InvalidConversionRule.into()), + } + } +} + +// Non-SQL +// - DateTime +fn parse_error_into(error: ParseError) -> Error { + ValueError::DateTimeParseError(format!("{:?}", error)).into() +} +impl Cast for Value { + // Default (from Timestamp) + fn cast(self) -> Result { + let timestamp: i64 = self.cast()?; + NaiveDateTime::from_timestamp_opt(timestamp, 0) + .ok_or_else(|| ValueError::ImpossibleCast.into()) + } +} +#[allow(clippy::zero_prefixed_literal)] +impl CastWithRules for Value { + fn cast_with_rule(self, rule: Self) -> Result { + fn for_format_datetime(string: Value, format: &str) -> Result { + let string: String = string.cast()?; + let string: &str = string.as_str(); + NaiveDateTime::parse_from_str(string, format).map_err(parse_error_into) + } + fn for_format_date(string: Value, format: &str) -> Result { + let string: String = string.cast()?; + let string: &str = string.as_str(); + Ok(NaiveDate::parse_from_str(string, format) + .map_err(parse_error_into)? + .and_hms(0, 0, 0)) + } + fn for_format_time(string: Value, format: &str) -> Result { + let string: String = string.cast()?; + let string: &str = string.as_str(); + Ok(NaiveDateTime::from_timestamp(0, 0) + .date() + .and_time(NaiveTime::parse_from_str(string, format).map_err(parse_error_into)?)) + } + fn try_rules(try_value: &Value, rules: &[i64]) -> Result { + rules + .iter() + .find_map(|try_rule| try_value.clone().cast_with_rule((*try_rule).into()).ok()) + .ok_or_else(|| ValueError::ParseError(try_value.clone(), "TIMESTAMP").into()) + } + const TRY_RULES_TIMESTAMP: [i64; 1] = [000]; + const TRY_RULES_DATETIME: [i64; 7] = [010, 011, 020, 021, 030, 031, 060]; + const TRY_RULES_DATE: [i64; 4] = [022, 033, 032, 061]; // 033 should go before 032 + const TRY_RULES_TIME: [i64; 2] = [100, 101]; + + match rule { + Value::Bool(true) => try_rules(&self, &TRY_RULES_TIMESTAMP), + Value::Str(custom) => match custom.as_str() { + "TIMESTAMP" => try_rules(&self, &TRY_RULES_TIMESTAMP), + "DATETIME" => try_rules(&self, &TRY_RULES_DATETIME), + "DATE" => try_rules(&self, &TRY_RULES_DATE), + "TIME" => try_rules(&self, &TRY_RULES_TIME), + custom_format => for_format_datetime(self.clone(), custom_format) + .or_else(|_| for_format_date(self.clone(), custom_format)) + .or_else(|_| for_format_time(self, custom_format)), + }, + Value::I64(000) => { + // From Timestamp (Default) + self.cast() + } + // 01* - Statically specifically defined by accepted standards bodies + /*Value::I64(010) => { + // From RFC 3339 format + let datetime_string: String = self.cast()?; + DateTime::parse_from_rfc3339(datetime_string.as_str()).map_err(parse_error_into) + } + Value::I64(011) => { + // From RFC 2822 format + let datetime_string: String = self.cast()?; + DateTime::parse_from_rfc2822(datetime_string.as_str()).map_err(parse_error_into) + }*/ + // 02* - Conventional + // - From Database format (YYYY-MM-DD HH:MM:SS) + Value::I64(020) => for_format_datetime(self, "%F %T"), + // - From Database format, no seconds (YYYY-MM-DD HH:MM) + Value::I64(021) => for_format_datetime(self, "%F %R"), + // - From Database format, no time (YYYY-MM-DD) + Value::I64(022) => for_format_date(self, "%F"), + + // 0(3-4)* - Normal + // - From Database format, grossified time (YYYY-MM-DD HH:MM:SS (AM/PM)) + Value::I64(030) => for_format_datetime(self, "%F %r"), + // - From Database format, grossified time, no seconds (YYYY-MM-DD HH:MM (AM/PM)) + Value::I64(031) => for_format_datetime(self, "%I:%M %p"), + // - From dd-Mon-YYYY + Value::I64(032) => for_format_date(self, "%v"), + // - From dd-Mon-YY + Value::I64(033) => for_format_date(self, "%e-%b-%y"), + + // 0(5-8)* - Locales + // 06* - Australia + Value::I64(060) => for_format_datetime(self, "%d/%m/%Y %H:%M"), + Value::I64(061) => for_format_date(self, "%d/%m/%Y"), + // (TODO(?)) + + // 10* - Time + // - (HH:MM:SS) + Value::I64(100) => for_format_time(self, "%T"), + // - No seconds (HH:MM) + Value::I64(101) => for_format_time(self, "%R"), + _ => Err(ValueError::InvalidConversionRule.into()), + } + } +} diff --git a/src/data/value/convert.rs b/src/data/value/convert.rs new file mode 100644 index 00000000..6724f8e6 --- /dev/null +++ b/src/data/value/convert.rs @@ -0,0 +1,77 @@ +use { + super::{Value, ValueError}, + crate::result::Result, + chrono::NaiveDateTime, +}; + +// TODO: No clone versions + +pub trait Convert { + fn convert(self) -> Result; +} + +pub trait ConvertFrom: Sized { + fn convert_from(value: Value) -> Result; +} +impl ConvertFrom for Core +where + Value: Convert + Clone, +{ + fn convert_from(value: Value) -> Result { + value.convert() + } +} + +impl Convert for Value { + fn convert(self) -> Result { + Ok(match self { + Value::Bool(inner) => inner, + other => return Err(ValueError::CannotConvert(other, "BOOLEAN").into()), + }) + } +} + +impl Convert for Value { + fn convert(self) -> Result { + Ok(match self { + Value::U64(inner) => inner, + other => return Err(ValueError::CannotConvert(other, "UINTEGER").into()), + }) + } +} + +impl Convert for Value { + fn convert(self) -> Result { + Ok(match self { + Value::I64(inner) => inner, + other => return Err(ValueError::CannotConvert(other, "INTEGER").into()), + }) + } +} + +impl Convert for Value { + fn convert(self) -> Result { + Ok(match self { + Value::F64(inner) => inner, + #[cfg(feature = "implicit_float_conversion")] + Value::I64(inner) => inner as f64, + other => return Err(ValueError::CannotConvert(other, "FLOAT").into()), + }) + } +} + +impl Convert for Value { + fn convert(self) -> Result { + Ok(match self { + Value::Str(inner) => inner, + other => return Err(ValueError::CannotConvert(other, "TEXT").into()), + }) + } +} + +impl Convert for Value { + fn convert(self) -> Result { + let secs = self.convert()?; + Ok(NaiveDateTime::from_timestamp(secs, 0)) + } +} diff --git a/src/data/value/error.rs b/src/data/value/error.rs new file mode 100644 index 00000000..72dfc95b --- /dev/null +++ b/src/data/value/error.rs @@ -0,0 +1,63 @@ +use {super::Value, serde::Serialize, std::fmt::Debug, thiserror::Error}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum ValueError { + #[error("literal: {literal} is incompatible with data type: {data_type}")] + IncompatibleLiteralForDataType { data_type: String, literal: String }, + + #[error("incompatible data type, data type: {data_type}, value: {value}")] + IncompatibleDataType { data_type: String, value: String }, + + #[error("null value on not null field")] + NullValueOnNotNullField, + + #[error("failed to parse number")] + FailedToParseNumber, + + #[error("unreachable failure on parsing number")] + UnreachableNumberParsing, + + #[error("floating columns cannot be set to unique constraint")] + ConflictOnFloatWithUniqueConstraint, + + #[error( + "number of function parameters not matching (expected: {expected:?}, found: {found:?})" + )] + NumberOfFunctionParamsNotMatching { expected: usize, found: usize }, + + #[error("conversion rule is not accepted for this type")] + InvalidConversionRule, + + #[error("impossible cast")] + ImpossibleCast, // Bad error-- phase out + + #[error("date time failed to parse: {0}")] + DateTimeParseError(String), + #[error("failed to parse {0:?} as {1}")] + ParseError(Value, &'static str), + #[error("something went wrong with date math")] + DateError, // Should avoid throwing + #[error("timestamp error: {0}")] + SpecifiedTimestampError(String), // Should avoid throwing + + #[error("cannot convert {0:?} into {1}")] + CannotConvert(Value, &'static str), + + #[error("{1} only supports numeric values, found {0:?}")] + OnlySupportsNumeric(Value, &'static str), + #[error("{1} only supports boolean values, found {0:?}")] + OnlySupportsBoolean(Value, &'static str), + #[error("bad input: {0:?}")] + BadInput(Value), + + #[error("unimplemented literal type")] + UnimplementedLiteralType, + #[error("unimplemented cast")] + UnimplementedCast, + #[error("unimplemented convert")] + UnimplementedConvert, + #[error("unreachable literal cast from number to integer: {0}")] + UnreachableLiteralCastFromNumberToInteger(String), + #[error("unimplemented literal cast: {literal} as {data_type}")] + UnimplementedLiteralCast { data_type: String, literal: String }, +} diff --git a/src/data/value/evaluated.rs b/src/data/value/evaluated.rs new file mode 100644 index 00000000..854c793c --- /dev/null +++ b/src/data/value/evaluated.rs @@ -0,0 +1,19 @@ +use { + crate::{ + data::Value, + executor::Evaluated, + result::{Error, Result}, + }, + std::convert::TryFrom, +}; + +impl TryFrom> for Value { + type Error = Error; + + fn try_from(evaluated: Evaluated) -> Result { + match evaluated { + Evaluated::Literal(literal) => Value::try_from(literal), + Evaluated::Value(value) => Ok(value), + } + } +} diff --git a/src/data/value/literal.rs b/src/data/value/literal.rs new file mode 100644 index 00000000..6f150807 --- /dev/null +++ b/src/data/value/literal.rs @@ -0,0 +1,26 @@ +use { + super::{error::ValueError, Value}, + crate::result::{Error, Result}, + sqlparser::ast::Value as AstValue, + std::convert::TryFrom, +}; + +impl<'a> TryFrom<&'a AstValue> for Value { + type Error = Error; + + fn try_from(ast_value: &'a AstValue) -> Result { + match ast_value { + AstValue::Boolean(value) => Ok(Value::Bool(*value)), + AstValue::Number(value, false) => value + .parse::() + .map_or_else( + |_| value.parse::().map(Value::F64), + |value| Ok(Value::I64(value)), + ) + .map_err(|_| ValueError::FailedToParseNumber.into()), + AstValue::SingleQuotedString(value) => Ok(Value::Str(value.clone())), + AstValue::Null => Ok(Value::Null), + _ => Err(ValueError::UnimplementedLiteralType.into()), + } + } +} diff --git a/src/data/value/methods/aggregate.rs b/src/data/value/methods/aggregate.rs new file mode 100644 index 00000000..5b866b4d --- /dev/null +++ b/src/data/value/methods/aggregate.rs @@ -0,0 +1,51 @@ +use { + crate::{Result, Value}, + std::cmp::Ordering, +}; + +// This does not intentionally take into account anything that could variably change data types +// (example: IIF(column = 1, CAST(other AS INTEGER), CAST(other AS TEXT))) +// COUNT is indifferent to types, +// MIN and MAX will just give whatever the MIN/MAX of the first type (partial_cmp would evaulate to None which gives accumulator) +// SUM will, for now, use generic_add which will throw if non-artithmatic. + +// Values returned as Value::Internal; need to be popped into Value::I64 + +impl Value { + pub fn aggregate_count(self, other: Value) -> Result { + Ok(Value::Internal(match (self, other) { + (Value::Null, Value::Null) => 0, + (Value::Internal(self_val), Value::Internal(other_val)) => self_val + other_val, + (Value::Internal(val), Value::Null) | (Value::Null, Value::Internal(val)) => val, + (Value::Internal(val), _) | (_, Value::Internal(val)) => val + 1, + (_, _) => 2, + })) + } + pub fn aggregate_min(self, other: Value) -> Result { + Ok( + if matches!(self.partial_cmp(&other), Some(Ordering::Less)) + || matches!(other, Value::Null) + { + self + } else { + other + }, + ) + } + pub fn aggregate_max(self, other: Value) -> Result { + Ok( + if matches!(self.partial_cmp(&other), Some(Ordering::Greater)) + || matches!(other, Value::Null) + { + self + } else { + other + }, + ) + } + pub fn aggregate_sum(self, other: Value) -> Result { + other + .if_null(Value::I64(0)) // TODO: Handle lack of implicit i64 -> f64 + .generic_add(self.if_null(Value::I64(0))) + } +} diff --git a/src/data/value/methods/binary.rs b/src/data/value/methods/binary.rs new file mode 100644 index 00000000..4ebef2cb --- /dev/null +++ b/src/data/value/methods/binary.rs @@ -0,0 +1,126 @@ +#![allow(clippy::should_implement_trait)] // TODO + +use { + super::ValueCore, + crate::{Convert, ConvertFrom, Result, Value, ValueError}, +}; + +// These were using references, they now consume their variables. See ::recipe. +macro_rules! natural_binary_op { + ($name: ident, $trait: ident, $op: tt) => { + pub fn $name(self, other: Self) -> Result + where + Core: ValueCore + $trait, + { + let (left, right) = (Core::convert_from(self)?, Core::convert_from(other)?); + let result = left $op right; + Ok(result.into()) + } + }; +} +macro_rules! natural_binary_ops { + ($(($name: ident, $trait: ident, $op: tt, $generic_name: ident)),+) => { + use std::ops::{$($trait),+}; + impl Value { + $( + natural_binary_op!($name, $trait, $op); + generic!($name, $generic_name); + )+ + } + } +} + +macro_rules! boolean_binary_op { + ($name: ident, $op: tt) => { + pub fn $name(self, other: Self) -> Result + { + let (left, right): (bool, bool) = (self.convert()?, other.convert()?); + let result = left $op right; + Ok(result.into()) + } + }; +} +macro_rules! boolean_binary_ops { + ($(($name: ident, $op: tt)),+) => { + impl Value { + $(boolean_binary_op!($name, $op);)+ + } + } +} + +macro_rules! comparative_binary_op { + ($name: ident, $op: tt) => { + pub fn $name(self, other: Self) -> Result { + Ok(Value::Bool(self $op other)) + } + }; +} +macro_rules! comparative_binary_ops { + ($(($name: ident, $op: tt)),+) => { + impl Value { + $(comparative_binary_op!($name, $op);)+ + } + } +} + +macro_rules! generic { + ($name: ident, $generic_name: ident) => { + pub fn $generic_name(self, other: Self) -> Result { + if matches!(self, Value::Null) || matches!(other, Value::Null) { + Ok(Value::Null) + } else if !i64::convert_from(self.clone()).is_err() + && !i64::convert_from(other.clone()).is_err() + { + self.$name::(other) + } else if !f64::convert_from(self.clone()).is_err() + && !f64::convert_from(other.clone()).is_err() + { + self.$name::(other) + } else { + Err(ValueError::OnlySupportsNumeric( + if f64::convert_from(self.clone()).is_err() { + self + } else { + other + }, + stringify!($name), + ) + .into()) + } + } + }; +} + +natural_binary_ops!( + (add, Add, +, generic_add), + (subtract, Sub, -, generic_subtract), + (multiply, Mul, *, generic_multiply), + (divide, Div, /, generic_divide), + (modulus, Rem, %, generic_modulus) +); + +boolean_binary_ops!( + (and, &), + (or, |), + (xor, ^) +); + +comparative_binary_ops!( + (eq, ==), + (not_eq, !=), + (gt, >), + (gt_eq, >=), + (lt, <), + (lt_eq, <=) +); + +impl Value { + pub fn string_concat(self, other: Self) -> Result { + Ok(format!( + "{}{}", + String::convert_from(self)?, + String::convert_from(other)? + ) + .into()) + } +} diff --git a/src/data/value/methods/function.rs b/src/data/value/methods/function.rs new file mode 100644 index 00000000..9af478f9 --- /dev/null +++ b/src/data/value/methods/function.rs @@ -0,0 +1,151 @@ +use { + crate::{Cast, CastWithRules, Convert, Result, Value, ValueError}, + chrono::NaiveDateTime, + uuid::Uuid, +}; + +macro_rules! expect_arguments { + ($arguments: expr, $expect: expr) => { + match $arguments.len() { + $expect => (), + found => { + return Err(ValueError::NumberOfFunctionParamsNotMatching { + expected: $expect, + found, + } + .into()) + } + } + }; +} + +macro_rules! optional_expect_arguments { + ($arguments: expr, $min: expr, $max: expr) => { + match $arguments.len() { + len if ($min..=$max).contains(&len) => (), + found => { + return Err(ValueError::NumberOfFunctionParamsNotMatching { + expected: $min, + found, + } + .into()) + } + } + }; +} + +impl Value { + pub fn function_if_null(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + Ok(arguments.remove(0).if_null(arguments.remove(0))) + } + pub fn function_null_if(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + arguments.remove(0).null_if(arguments.remove(0)) + } + pub fn function_iif(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 3); + arguments + .remove(0) + .iif(arguments.remove(0), arguments.remove(0)) + } + pub fn function_to_lowercase(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + arguments.remove(0).to_lowercase() + } + pub fn function_to_uppercase(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + arguments.remove(0).to_uppercase() + } + pub fn function_left(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + arguments.remove(0).left(arguments.remove(0)) + } + pub fn function_right(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + arguments.remove(0).right(arguments.remove(0)) + } + pub fn function_length(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + arguments.remove(0).length() + } + + pub fn function_concat(mut arguments: Vec) -> Result { + arguments.remove(0).concat(arguments) + } + + pub fn function_replace(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 3); + arguments + .remove(0) + .replace(arguments.remove(0), arguments.remove(0)) + } + + pub fn function_round(mut arguments: Vec) -> Result { + optional_expect_arguments!(arguments, 1, 2); + let value = arguments.remove(0); + let places = if !arguments.is_empty() { + arguments.remove(0) + } else { + Self::I64(0) + }; + value.round(places) + } + + pub fn function_rand(arguments: Vec) -> Result { + match arguments.len() { + 0 => Self::function_random_float(arguments), + 2 => Self::function_random_int(arguments), + found => { + Err(ValueError::NumberOfFunctionParamsNotMatching { expected: 0, found }.into()) + } + } + } + pub fn function_random_float(arguments: Vec) -> Result { + expect_arguments!(arguments, 0); + Ok(Self::F64(fastrand::f64())) + } + pub fn function_random_int(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + let min: i64 = arguments.remove(0).cast()?; + let max: i64 = arguments.remove(0).cast()?; + Ok(Self::I64(fastrand::i64(min..=max))) + } + pub fn function_uuid(arguments: Vec) -> Result { + expect_arguments!(arguments, 0); + Ok(Self::Str(Uuid::new_v4().to_hyphenated().to_string())) // TODO: Custom type + } + + pub fn function_pow(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 2); + arguments.remove(0).pow(arguments.remove(0)) + } + + pub fn function_convert(mut arguments: Vec) -> Result { + optional_expect_arguments!(arguments, 2, 3); + let datatype: String = arguments.remove(0).convert()?; + let value = arguments.remove(0); + let rule = if !arguments.is_empty() { + arguments.remove(0) + } else { + Self::I64(0) + }; + Ok(match datatype.to_uppercase().as_str() { + // Unfortunatly we cannot get datatype directly, it needs to be given as string + "BOOLEAN" => Value::Bool(value.cast_with_rule(rule)?), + "INTEGER" => Value::I64(value.cast_with_rule(rule)?), + "FLOAT" => Value::F64(value.cast_with_rule(rule)?), + "TEXT" => Value::Str(value.cast_with_rule(rule)?), + "TIMESTAMP" => { + // Temp, need Value::Timestamp + let datetime: NaiveDateTime = value.cast_with_rule(rule)?; + + Value::I64(datetime.timestamp()) + } + _ => return Err(ValueError::UnimplementedConvert.into()), + }) + } + pub fn function_try_convert(arguments: Vec) -> Result { + Ok(Value::function_convert(arguments).unwrap_or(Value::Null)) + } +} diff --git a/src/data/value/methods/mod.rs b/src/data/value/methods/mod.rs new file mode 100644 index 00000000..97cfe033 --- /dev/null +++ b/src/data/value/methods/mod.rs @@ -0,0 +1,16 @@ +mod aggregate; +mod binary; +mod function; +mod timestamp; +mod unary; +mod utility; +use { + crate::{ConvertFrom, Value}, + std::convert::Into, +}; + +pub trait ValueCore: Into + ConvertFrom {} +impl ValueCore for bool {} +impl ValueCore for i64 {} +impl ValueCore for f64 {} +impl ValueCore for String {} diff --git a/src/data/value/methods/timestamp.rs b/src/data/value/methods/timestamp.rs new file mode 100644 index 00000000..e10d6d29 --- /dev/null +++ b/src/data/value/methods/timestamp.rs @@ -0,0 +1,314 @@ +use { + crate::{Convert, Result, Value, ValueError}, + chrono::{Datelike, NaiveDate, NaiveDateTime, Timelike}, + fstrings::*, + std::{ + cmp::min, + convert::TryInto, + panic, + time::{SystemTime, UNIX_EPOCH}, + }, +}; + +macro_rules! protect_null { + ($protect: expr) => { + match $protect { + Value::Null => return Ok(Value::Null), + other => other, + } + }; +} + +macro_rules! expect_arguments { + ($arguments: expr, $expect: expr) => { + match $arguments.len() { + $expect => (), + found => { + return Err(ValueError::NumberOfFunctionParamsNotMatching { + expected: $expect, + found, + } + .into()) + } + } + }; +} + +macro_rules! optional_expect_arguments { + ($arguments: expr, $min: expr, $max: expr) => { + match $arguments.len() { + len if ($min..=$max).contains(&len) => (), + found => { + return Err(ValueError::NumberOfFunctionParamsNotMatching { + expected: $min, + found, + } + .into()) + } + } + }; +} + +impl Value { + pub fn function_now(arguments: Vec) -> Result { + expect_arguments!(arguments, 0); + Value::now() + } + pub fn function_year(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).year() + } + pub fn function_month(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).month() + } + pub fn function_day(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).day() + } + pub fn function_hour(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).hour() + } + pub fn function_minute(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).minute() + } + pub fn function_second(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 1); + protect_null!(arguments.remove(0)).second() + } + + pub fn function_timestamp_add(mut arguments: Vec) -> Result { + expect_arguments!(arguments, 3); + arguments.remove(0).date_add( + protect_null!(arguments.remove(0)), + protect_null!(arguments.remove(0)), + ) + } + pub fn function_timestamp_from_parts(arguments: Vec) -> Result { + optional_expect_arguments!(arguments, 1, 6); + protect_null!(arguments.get(0).cloned().unwrap_or(Value::I64(1))).date_from_parts( + protect_null!(arguments.get(1).cloned().unwrap_or(Value::I64(1))), + protect_null!(arguments.get(2).cloned().unwrap_or(Value::I64(1))), + protect_null!(arguments.get(3).cloned().unwrap_or(Value::I64(0))), + protect_null!(arguments.get(4).cloned().unwrap_or(Value::I64(0))), + protect_null!(arguments.get(5).cloned().unwrap_or(Value::I64(0))), + ) + } +} + +// System +impl Value { + pub fn now() -> Result { + Ok(Value::I64( + NaiveDateTime::from_timestamp( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() as i64, + 0, + ) + .timestamp(), + )) + } +} + +// Parts +impl Value { + pub fn year(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.year() as i64)) + } + pub fn month(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.month() as i64)) + } + pub fn day(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.day() as i64)) + } + pub fn hour(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.hour() as i64)) + } + pub fn minute(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.minute() as i64)) + } + pub fn second(self) -> Result { + let datetime: NaiveDateTime = self.convert()?; + Ok(Value::I64(datetime.second() as i64)) + } +} + +// Math +impl Value { + pub fn date_add(self, amount: Value, datetime: Value) -> Result { + let datetime: NaiveDateTime = datetime.convert()?; + let amount: i64 = amount.convert()?; + let amount: i32 = amount.try_into().map_err(|_| ValueError::DateError)?; + if amount > 100_000 { + panic!("Looks like you put the amount and timestamp the wrong way around. This will be fixed in future by using different datatypes"); + } + + match self { + Value::Str(string) if string == "YEAR" => { + let years = datetime.year() + amount as i32; + let calculated = datetime.with_year(years).unwrap_or_else( + || { + datetime + .with_day(28) + .ok_or(ValueError::DateError) + .unwrap() //? + .with_year(years) + .ok_or(ValueError::DateError) + .unwrap() + }, //?, + ); + Ok(Value::I64(calculated.timestamp())) + } + Value::Str(string) if string == "MONTH" => { + let month: i32 = datetime + .month() + .try_into() + .map_err(|_| ValueError::DateError)?; + + let months = month + amount; + let month = ((months - 1) % 12) + 1; + + let years = (months - month) / 12; + let month: u32 = month.try_into().map_err(|_| ValueError::DateError).unwrap(); //?; + + let (years, month) = if month == 0 { (-1, 12) } else { (years, month) }; // TEMP-- no support for > -1 yet + + let next_month = if datetime.month() == 12 { + NaiveDate::from_ymd(datetime.year() + 1, 1, 1) + } else { + NaiveDate::from_ymd(datetime.year(), datetime.month() + 1, 1) + }; + let this_month = NaiveDate::from_ymd(datetime.year(), datetime.month(), 1); + + let month_days: u32 = NaiveDate::signed_duration_since(next_month, this_month) + .num_days() + .try_into() + .map_err(|_| ValueError::DateError)?; + + let day = min(datetime.day(), month_days); + let calculated = datetime + .with_day(day) + .ok_or(ValueError::DateError) + .unwrap() //? + .with_month(month) + .ok_or(ValueError::DateError) + .unwrap(); //?; + + let calculated = Value::I64(calculated.timestamp()); + Value::Str(String::from("YEAR")).date_add(Value::I64(years as i64), calculated) + } + Value::Str(string) if string == "DAY" => { + let day: i32 = datetime + .day() + .try_into() + .map_err(|_| ValueError::DateError)?; + let days = day + amount; + + let next_month = if datetime.month() == 12 { + NaiveDate::from_ymd(datetime.year() + 1, 1, 1) + } else { + NaiveDate::from_ymd(datetime.year(), datetime.month() + 1, 1) + }; + let this_month = NaiveDate::from_ymd(datetime.year(), datetime.month(), 1); + + let month_days: i32 = NaiveDate::signed_duration_since(next_month, this_month) + .num_days() + .try_into() + .map_err(|_| ValueError::DateError)?; + + if days > month_days { + let first_day = datetime.with_day(1).ok_or(ValueError::DateError)?; + let next_month = Value::Str(String::from("MONTH")) + .date_add(Value::I64(1), Value::I64(first_day.timestamp()))?; + Value::Str(String::from("DAY")).date_add( + Value::I64( + (days - month_days - 1) + .try_into() + .map_err(|_| ValueError::DateError) + .unwrap(), //?, + ), + next_month, + ) + } else if days <= 0 { + let prev_month = if datetime.month() == 1 { + NaiveDate::from_ymd(datetime.year() - 1, 12, 1) + } else { + NaiveDate::from_ymd(datetime.year(), datetime.month() - 1, 1) + }; + + let prev_month_days: i32 = + NaiveDate::signed_duration_since(this_month, prev_month) + .num_days() + .try_into() + .map_err(|_| ValueError::DateError)?; + + let first_day = datetime.with_day(1).ok_or(ValueError::DateError)?; + let prev_month = Value::Str(String::from("MONTH")) + .date_add(Value::I64(-1), Value::I64(first_day.timestamp()))?; + Value::Str(String::from("DAY")).date_add( + Value::I64( + (days + prev_month_days - 1) + .try_into() + .map_err(|_| ValueError::DateError) + .unwrap(), //?, + ), + prev_month, + ) + } else { + let day: u32 = days.try_into().map_err(|_| ValueError::DateError)?; + Ok(Value::I64( + datetime + .with_day(day) + .ok_or(ValueError::DateError)? + .timestamp(), + )) + } + } + _ => Err(ValueError::BadInput(self).into()), + } + } + pub fn date_from_parts( + self, + month: Value, + day: Value, + hour: Value, + minute: Value, + second: Value, + ) -> Result { + let (year, month, day, hour, minute, second): (i64, i64, i64, i64, i64, i64) = ( + self.convert()?, + month.convert()?, + day.convert()?, + hour.convert()?, + minute.convert()?, + second.convert()?, + ); + let (year, month, day, hour, minute, second): (i32, u32, u32, u32, u32, u32) = ( + year.try_into().map_err(|_| ValueError::DateError)?, + month.try_into().map_err(|_| ValueError::DateError)?, + day.try_into().map_err(|_| ValueError::DateError)?, + hour.try_into().map_err(|_| ValueError::DateError)?, + minute.try_into().map_err(|_| ValueError::DateError)?, + second.try_into().map_err(|_| ValueError::DateError)?, + ); + let datetime = panic::catch_unwind(|| { + NaiveDate::from_ymd(year, month, day).and_hms(hour, minute, second) + }) + .map_err(|panic| { + ValueError::SpecifiedTimestampError(f!( + "{year=}, {month=}, {day=},\n{hour=}, {minute=}, {second=}\n{panic=:?}" + )) + })?; + + Ok(Value::I64(datetime.timestamp())) + } +} diff --git a/src/data/value/methods/unary.rs b/src/data/value/methods/unary.rs new file mode 100644 index 00000000..05a9302b --- /dev/null +++ b/src/data/value/methods/unary.rs @@ -0,0 +1,52 @@ +use { + super::ValueCore, + crate::{Convert, ConvertFrom, Result, Value, ValueError}, + std::ops::Neg, +}; + +macro_rules! generic { + ($name: ident, $generic_name: ident) => { + pub fn $generic_name(self) -> Result { + if !i64::convert_from(self.clone()).is_err() { + // TODO: Improve + self.$name::() + } else if !f64::convert_from(self.clone()).is_err() { + self.$name::() + } else { + Err(ValueError::OnlySupportsNumeric(self, stringify!($name)).into()) + } + } + }; +} + +#[allow(clippy::should_implement_trait)] // TODO +impl Value { + pub fn unary_plus(self) -> Result + where + Core: ValueCore + Clone, + { + let core = Core::convert_from(self)?; + let result = core; + Ok(result.into()) + } + pub fn unary_minus(self) -> Result + where + Core: ValueCore + Neg, + { + let core = Core::convert_from(self)?; + let result = -core; + Ok(result.into()) + } + + generic!(unary_plus, generic_unary_plus); + generic!(unary_minus, generic_unary_minus); + + pub fn not(self) -> Result { + let boolean: bool = self.convert()?; + let result = !boolean; + Ok(result.into()) + } + pub fn is_null(self) -> Result { + Ok(Value::Bool(matches!(self, Value::Null))) + } +} diff --git a/src/data/value/methods/utility.rs b/src/data/value/methods/utility.rs new file mode 100644 index 00000000..d104f798 --- /dev/null +++ b/src/data/value/methods/utility.rs @@ -0,0 +1,109 @@ +use { + crate::{Convert, Result, Value, ValueError}, + std::cmp::min, +}; + +macro_rules! protect_null { + ($protect: expr) => { + if matches!($protect, Value::Null) { + return Ok($protect); + } + }; +} + +impl Value { + pub fn if_null(self, alternative: Self) -> Self { + if !matches!(self, Value::Null) { + self + } else { + alternative + } + } + pub fn null_if(self, evaluate: Self) -> Result { + Ok(if self == evaluate { Value::Null } else { self }) + } + pub fn iif(self, case_true: Self, case_false: Self) -> Result { + Ok(if self.convert()? { + case_true + } else { + case_false + }) + } + + pub fn to_uppercase(self) -> Result { + protect_null!(self); + let string: String = self.convert()?; + Ok(string.to_uppercase().into()) + } + pub fn to_lowercase(self) -> Result { + protect_null!(self); + let string: String = self.convert()?; + Ok(string.to_lowercase().into()) + } + pub fn left(self, length: Value) -> Result { + protect_null!(self); + protect_null!(length); + let length: i64 = length.convert()?; + if length < 0 { + return Err(ValueError::BadInput(length.into()).into()); + } + let length: usize = length as usize; + let string: String = self.convert()?; + + let truncated = string + .get(..length) + .map(|result| result.to_string()) + .unwrap_or(string); + Ok(Value::Str(truncated)) + } + pub fn right(self, length: Value) -> Result { + protect_null!(self); + protect_null!(length); + let length: i64 = length.convert()?; + if length < 0 { + return Err(ValueError::BadInput(length.into()).into()); + } + let length: usize = length as usize; + let string: String = self.convert()?; + + let truncated = string + .get(string.len() - min(string.len(), length)..) + .map(|result| result.to_string()) + .unwrap_or(string); + Ok(Value::Str(truncated)) + } + pub fn length(self) -> Result { + let string: String = self.convert()?; + Ok(Value::I64(string.len() as i64)) + } + + pub fn concat(self, strings: Vec) -> Result { + strings + .into_iter() + .try_fold(self, |all, this| all.string_concat(this)) + } + + pub fn replace(self, from: Value, to: Value) -> Result { + protect_null!(self); + let string: String = self.convert()?; + let from: String = from.convert()?; + let to: String = to.convert()?; + + Ok(string.replace(&from, &to).into()) + } + + pub fn round(self, places: Value) -> Result { + if matches!(self, Value::Null) { + return Ok(self); + } + let value: f64 = self.convert()?; + let places: i64 = places.convert()?; + let raiser: f64 = 10_u32.pow(places as u32).into(); + Ok(Value::F64((value * raiser).round() / raiser)) + } + pub fn pow(self, power: Value) -> Result { + let value: f64 = self.convert()?; + let power: f64 = power.convert()?; + Ok(Value::F64(value.powf(power))) + } +} diff --git a/src/data/value/mod.rs b/src/data/value/mod.rs new file mode 100644 index 00000000..6680818c --- /dev/null +++ b/src/data/value/mod.rs @@ -0,0 +1,286 @@ +use { + crate::result::Result, + serde::{Deserialize, Serialize}, + sqlparser::ast::DataType, + std::{ + cmp::Ordering, + fmt::Debug, + hash::{Hash, Hasher}, + }, +}; + +mod big_endian; +mod cast; +mod convert; +mod error; +mod literal; +mod methods; +mod serde_convert; +mod value_type; + +pub use { + big_endian::BigEndian, + cast::{Cast, CastWithRules}, + convert::{Convert, ConvertFrom}, + error::ValueError, + value_type::ValueType, +}; + +/// # Value +/// Value is MultiSQL's value wrapper and stores any values which interact with the stores. +/// At times they may be converted in the interface for convinence but otherwise, all value interactions with MultiSQL require this wrapper. +/// +/// ## Conversion +/// Value implements conversion from inner types; for example: +/// +/// ``` +/// # use multisql::Value; +/// let value: Value = Value::I64(10); +/// let int: i64 = 10; +/// +/// let int_value: Value = int.into(); +/// +/// assert_eq!(value, int_value); +/// ``` +/// +/// ### Casting +/// Values can be cast between types via [Cast], for example: +/// +/// ``` +/// # use multisql::{Value, Cast}; +/// let value_str: Value = Value::Str(String::from("10")); +/// let int: i64 = 10; +/// +/// let str_int: i64 = value_str.cast().unwrap(); +/// +/// assert_eq!(int, str_int); +/// +/// assert_eq!(Value::I64(int), Value::I64(Value::Str(String::from("10")).cast().unwrap())); +/// ``` +/// +/// ## Equality +/// Values of the same type compare as their inner values would. +/// +/// Null never equals Null. +/// +/// Floats and Integers implicitly compare and convert. +/// (Feature: `implicit_float_conversion`) +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Value { + Null, + + Bool(bool), + U64(u64), + I64(i64), + F64(f64), + Str(String), + + Bytes(Vec), + Timestamp(i64), + + Internal(i64), +} + +impl Hash for Value { + fn hash(&self, state: &mut H) { + self.to_be_bytes().hash(state) + } +} +impl Eq for Value {} +impl Ord for Value { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap_or(Ordering::Equal) + } +} + +impl From for Value { + fn from(from: bool) -> Value { + Value::Bool(from) + } +} +impl From for Value { + fn from(from: i64) -> Value { + Value::I64(from) + } +} +impl From for Value { + fn from(from: f64) -> Value { + Value::F64(from) + } +} +impl From for Value { + fn from(from: String) -> Value { + Value::Str(from) + } +} +impl From for String { + // unsafe + fn from(from: Value) -> String { + from.cast().unwrap() + } +} + +impl PartialEq for Value { + fn eq(&self, other: &Value) -> bool { + match (self, other) { + (Value::Bool(l), Value::Bool(r)) => l == r, + (Value::I64(l), Value::I64(r)) => l == r, + (Value::F64(l), Value::F64(r)) => l == r, + (Value::Str(l), Value::Str(r)) => l == r, + (Value::Bytes(l), Value::Bytes(r)) => l == r, + (Value::Timestamp(l), Value::Timestamp(r)) => l == r, + + (Value::Internal(l), Value::Internal(r)) => l == r, + + #[cfg(feature = "implicit_float_conversion")] + (Value::I64(l), Value::F64(r)) => (*l as f64) == *r, + #[cfg(feature = "implicit_float_conversion")] + (Value::F64(l), Value::I64(r)) => *l == (*r as f64), + _ => false, + } + } +} + +impl PartialOrd for Value { + fn partial_cmp(&self, other: &Value) -> Option { + match (self, other) { + (Value::Bool(l), Value::Bool(r)) => Some(l.cmp(r)), + (Value::I64(l), Value::I64(r)) => Some(l.cmp(r)), + (Value::F64(l), Value::F64(r)) => l.partial_cmp(r), + (Value::Str(l), Value::Str(r)) => Some(l.cmp(r)), + (Value::Bytes(l), Value::Bytes(r)) => Some(l.cmp(r)), + (Value::Timestamp(l), Value::Timestamp(r)) => Some(l.cmp(r)), + + (Value::Internal(l), Value::Internal(r)) => Some(l.cmp(r)), + + #[cfg(feature = "implicit_float_conversion")] + (Value::I64(l), Value::F64(r)) => (*l as f64).partial_cmp(r), + + #[cfg(feature = "implicit_float_conversion")] + (Value::F64(l), Value::I64(r)) => l.partial_cmp(&(*r as f64)), + + _ => None, + } + } +} + +pub trait NullOrd { + fn null_cmp(&self, other: &Self) -> Option; +} + +impl NullOrd for Value { + fn null_cmp(&self, other: &Self) -> Option { + self.partial_cmp(other).or(match (self, other) { + (Value::Null, Value::Null) => None, + (Value::Null, _) => Some(Ordering::Less), + (_, Value::Null) => Some(Ordering::Greater), + _ => None, + }) + } +} + +impl Value { + pub fn validate_type(mut self, data_type: &DataType) -> Result { + let mut valid = self.type_is_valid(data_type); + + if !valid { + let converted = match data_type { + DataType::Float(_) => self.clone().convert().map(Value::F64).ok(), + _ => None, + }; + if let Some(converted) = converted { + if converted.type_is_valid(data_type) { + valid = true; + self = converted; + } + } + } + + if !valid { + return Err(ValueError::IncompatibleDataType { + data_type: data_type.to_string(), + value: format!("{:?}", self), + } + .into()); + } + + Ok(self) + } + pub fn is(&mut self, data_type: &ValueType) -> Result<()> { + match (data_type, &self) { + (ValueType::Bool, Value::Bool(_)) + | (ValueType::U64, Value::U64(_)) + | (ValueType::I64, Value::I64(_)) + | (ValueType::F64, Value::F64(_)) + | (ValueType::Str, Value::Str(_)) + | (ValueType::Timestamp, Value::Timestamp(_)) + | (ValueType::Any, _) + | (_, Value::Null) => Ok(()), + (ValueType::F64, Value::I64(_)) => { + *self = Value::F64(self.clone().cast()?); + Ok(()) + } + _ => Err(ValueError::IncompatibleDataType { + data_type: data_type.to_string(), + value: format!("{:?}", self), + } + .into()), + } + } + + fn type_is_valid(&self, data_type: &DataType) -> bool { + matches!( + (data_type, self), + (DataType::Boolean, Value::Bool(_)) + | (DataType::Int(_), Value::I64(_)) + | (DataType::Float(_), Value::F64(_)) + | (DataType::Text, Value::Str(_)) + | (DataType::Boolean, Value::Null) + | (DataType::Int(_), Value::Null) + | (DataType::Float(_), Value::Null) + | (DataType::Text, Value::Null) + ) + } + + pub fn validate_null(&self, nullable: bool) -> Result<()> { + if !nullable && matches!(self, Value::Null) { + return Err(ValueError::NullValueOnNotNullField.into()); + } + + Ok(()) + } + + pub fn cast_datatype(&self, data_type: &DataType) -> Result { + match (data_type, self) { + (DataType::Boolean, Value::Bool(_)) + | (DataType::Int(_), Value::I64(_)) + | (DataType::Float(_), Value::F64(_)) + | (DataType::Text, Value::Str(_)) => Ok(self.clone()), + (_, Value::Null) => Ok(Value::Null), + + (DataType::Boolean, value) => value.clone().cast().map(Value::Bool), + (DataType::Int(_), value) => value.clone().cast().map(Value::I64), + (DataType::Float(_), value) => value.clone().cast().map(Value::F64), + (DataType::Text, value) => value.clone().cast().map(Value::Str), + + _ => Err(ValueError::UnimplementedCast.into()), + } + } + + pub fn inc(&self) -> Self { + match self { + Value::Bool(false) => Value::Bool(true), + Value::I64(val) => Value::I64(val + 1), + Value::F64(val) => Value::F64(f64::from_bits(val.to_bits() + 1)), + _ => unimplemented!(), // TODO: Handle better & expand + } + } + pub fn dec(&self) -> Self { + match self { + Value::Bool(true) => Value::Bool(false), + Value::I64(val) => Value::I64(val - 1), + Value::F64(val) => Value::F64(f64::from_bits(val.to_bits() - 1)), + _ => unimplemented!(), // TODO: Handle better & expand + } + } +} diff --git a/src/data/value/serde_convert.rs b/src/data/value/serde_convert.rs new file mode 100644 index 00000000..91762fe5 --- /dev/null +++ b/src/data/value/serde_convert.rs @@ -0,0 +1,14 @@ +use crate::Value; + +impl From for serde_json::value::Value { + fn from(value: Value) -> serde_json::value::Value { + match value { + Value::Bool(value) => value.into(), + Value::I64(value) => value.into(), + Value::F64(value) => value.into(), + Value::Str(value) => value.into(), + Value::Null => serde_json::value::Value::Null, + _ => unimplemented!(), + } + } +} diff --git a/src/data/value/value_type/cast.rs b/src/data/value/value_type/cast.rs new file mode 100644 index 00000000..2cb3ae2d --- /dev/null +++ b/src/data/value/value_type/cast.rs @@ -0,0 +1,39 @@ +use { + crate::{Cast, Result, Value, ValueError, ValueType}, + std::string::ToString, +}; + +impl Value { + pub fn cast_valuetype(&self, value_type: &ValueType) -> Result { + match (value_type, self) { + (ValueType::Bool, Value::Bool(_)) + | (ValueType::I64, Value::I64(_)) + | (ValueType::F64, Value::F64(_)) + | (ValueType::Str, Value::Str(_)) + | (ValueType::Any, _) => Ok(self.clone()), + (_, Value::Null) => Ok(Value::Null), + + (ValueType::Bool, value) => value.clone().cast().map(Value::Bool), + (ValueType::I64, value) => value.clone().cast().map(Value::I64), + (ValueType::F64, value) => value.clone().cast().map(Value::F64), + (ValueType::Str, value) => value.clone().cast().map(Value::Str), + + _ => Err(ValueError::UnimplementedCast.into()), + } + } +} + +impl ToString for ValueType { + fn to_string(&self) -> String { + use ValueType::*; + match self { + Bool => String::from("Boolean"), + U64 => String::from("Unsigned Integer"), + I64 => String::from("Signed Integer"), + F64 => String::from("Float"), + Str => String::from("Text"), + Timestamp => String::from("Timestamp"), + Any => String::from("Any"), + } + } +} diff --git a/src/data/value/value_type/mod.rs b/src/data/value/value_type/mod.rs new file mode 100644 index 00000000..894750d4 --- /dev/null +++ b/src/data/value/value_type/mod.rs @@ -0,0 +1,49 @@ +mod cast; + +use { + crate::Value, + serde::{Deserialize, Serialize}, + sqlparser::ast::DataType, +}; + +#[derive(Clone, Serialize, Deserialize)] +pub enum ValueType { + Bool, + U64, + I64, + F64, + Str, + Timestamp, + Any, +} +impl Default for ValueType { + fn default() -> Self { + Self::Any + } +} +impl From for ValueType { + fn from(value: Value) -> Self { + match value { + Value::Bool(_) => ValueType::Bool, + Value::U64(_) => ValueType::U64, + Value::I64(_) => ValueType::I64, + Value::F64(_) => ValueType::F64, + Value::Str(_) => ValueType::Str, + Value::Timestamp(_) => ValueType::Timestamp, + _ => ValueType::Any, + } + } +} +impl From for ValueType { + fn from(data_type: DataType) -> Self { + match data_type { + DataType::Boolean => ValueType::Bool, + DataType::UnsignedInt(_) => ValueType::U64, + DataType::Int(_) => ValueType::I64, + DataType::Float(_) => ValueType::F64, + DataType::Text => ValueType::Str, + DataType::Timestamp => ValueType::Timestamp, + _ => ValueType::Any, + } + } +} diff --git a/src/database/auto_increment.rs b/src/database/auto_increment.rs new file mode 100644 index 00000000..30200d1a --- /dev/null +++ b/src/database/auto_increment.rs @@ -0,0 +1,33 @@ +use { + crate::{DatabaseError, Result}, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +pub trait AutoIncrement { + async fn generate_increment_values( + &mut self, + _table_name: String, + _columns: Vec<( + usize, /*index*/ + String, /*name*/ + i64, /*row_count*/ + ) /*column*/>, // TODO: Use struct + ) -> Result< + Vec<( + /*column*/ (usize /*index*/, String /*name*/), + /*start_value*/ i64, + )>, + > { + Err(DatabaseError::Unimplemented.into()) + } + + async fn set_increment_value( + &mut self, + _table_name: &str, + _column_name: &str, + _end: i64, + ) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } +} diff --git a/src/database/base.rs b/src/database/base.rs new file mode 100644 index 00000000..3918b423 --- /dev/null +++ b/src/database/base.rs @@ -0,0 +1,34 @@ +use { + crate::{DatabaseError, IndexFilter, Plane, Result, Schema, Value}, + async_trait::async_trait, +}; + +/// `Store` -> `SELECT` +#[async_trait(?Send)] +pub trait DBBase { + async fn fetch_schema(&self, _table_name: &str) -> Result> { + Err(DatabaseError::Unimplemented.into()) + } + async fn scan_schemas(&self) -> Result> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn scan_data(&self, _table_name: &str) -> Result { + Err(DatabaseError::Unimplemented.into()) + } + + async fn scan_data_indexed( + &self, + _table_name: &str, + _index_filters: IndexFilter, + ) -> Result { + Err(DatabaseError::Unimplemented.into()) + } + async fn scan_index( + &self, + _table_name: &str, + _index_filter: IndexFilter, + ) -> Result> { + Err(DatabaseError::Unimplemented.into()) + } +} diff --git a/src/database/mod.rs b/src/database/mod.rs new file mode 100644 index 00000000..75c170ef --- /dev/null +++ b/src/database/mod.rs @@ -0,0 +1,99 @@ +mod auto_increment; +mod base; +mod mutable; + +use std::sync::{Mutex, MutexGuard}; +use { + crate::Result, + serde::{Deserialize, Serialize}, + std::fmt::Debug, + thiserror::Error, +}; + +pub use {auto_increment::AutoIncrement, base::DBBase, mutable::DBMut}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum DatabaseError { + #[error("this database has not yet implemented this method")] + Unimplemented, + #[error("tried to connect to an unknown database")] + UnknownConnection, + #[error("table not found")] + TableNotFound, +} + +#[derive(Serialize, Deserialize)] +pub enum Connection { + Unknown, + #[cfg(feature = "memory-database")] + Memory, + #[cfg(feature = "sled-database")] + Sled(String), + #[cfg(feature = "csv-database")] + CSV(String, crate::CSVSettings), + #[cfg(feature = "sheet-database")] + Sheet(String), +} +impl Default for Connection { + fn default() -> Self { + Connection::Unknown + } +} +impl TryFrom for Database { + type Error = crate::Error; + fn try_from(connection: Connection) -> Result { + use { + crate::{CSVDatabase, MemoryDatabase, SheetDatabase, SledDatabase}, + Connection::*, + }; + let database: Mutex> = Mutex::new(match &connection { + #[cfg(feature = "memory-database")] + Memory => Box::new(MemoryDatabase::new()), + #[cfg(feature = "sled-database")] + Sled(path) => Box::new(SledDatabase::new(path)?), + #[cfg(feature = "csv-database")] + CSV(path, settings) => Box::new(CSVDatabase::new_with_settings(path, settings.clone())?), + #[cfg(feature = "sheet-database")] + Sheet(path) => Box::new(SheetDatabase::new(path)?), + Unknown => return Err(DatabaseError::UnknownConnection.into()), + }); + Ok(Database { + database, + source_connection: connection, + }) + } +} + +pub struct Database { + source_connection: Connection, + database: Mutex>, +} +impl Database { + pub fn new(database: Box) -> Self { + let database = Mutex::new(database); + Self { + database, + source_connection: Connection::default(), + } + } + pub fn get(&self) -> MutexGuard> { + self.database + .lock() + .expect("Unreachable: Database wasn't replaced!") + } + pub fn get_mut(&mut self) -> &mut Box { + self.database + .get_mut() + .expect("Unreachable: Database wasn't replaced!") + } + pub fn into_source(self) -> Connection { + self.source_connection + } + pub fn from_source(connection: Connection) -> Result { + connection.try_into() + } +} + +pub type DatabaseInner = dyn DBFull; + +pub trait DBFull: DBBase + DBMut + AutoIncrement {} diff --git a/src/database/mutable.rs b/src/database/mutable.rs new file mode 100644 index 00000000..722c0456 --- /dev/null +++ b/src/database/mutable.rs @@ -0,0 +1,41 @@ +use { + crate::{DatabaseError, Result, Row, Schema, SchemaDiff, Value}, + async_trait::async_trait, +}; + +/// `StoreMut` -> `INSERT`, `CREATE`, `DELETE`, `DROP`, `UPDATE` +#[async_trait(?Send)] +pub trait DBMut { + async fn insert_schema(&mut self, _schema: &Schema) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn delete_schema(&mut self, _table_name: &str) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } // Shouldn't this be AlterTable? + + async fn insert_data(&mut self, _table_name: &str, _rows: Vec) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn update_data(&mut self, _table_name: &str, _rows: Vec<(Value, Row)>) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn delete_data(&mut self, _table_name: &str, _keys: Vec) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn update_index( + &mut self, + _index_name: &str, + _table_name: &str, + _keys: Vec<(Value, Value)>, + ) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } + + async fn alter_table(&mut self, _table_name: &str, _schema_diff: SchemaDiff) -> Result<()> { + Err(DatabaseError::Unimplemented.into()) + } +} diff --git a/src/databases/csv/auto_increment.rs b/src/databases/csv/auto_increment.rs new file mode 100644 index 00000000..25059c8e --- /dev/null +++ b/src/databases/csv/auto_increment.rs @@ -0,0 +1,43 @@ +use { + super::CSVDatabase, + crate::{AutoIncrement, Result, WIPError}, + async_trait::async_trait, + linecount::count_lines, +}; + +#[async_trait(?Send)] +impl AutoIncrement for CSVDatabase { + async fn generate_increment_values( + &mut self, + _table_name: String, + columns: Vec<( + usize, /*index*/ + String, /*name*/ + i64, /*row_count*/ + ) /*column*/>, + ) -> Result< + Vec<( + /*column*/ (usize /*index*/, String /*name*/), + /*start_value*/ i64, + )>, + > { + let lines: i64 = count_lines( + std::fs::File::open(self.path.as_str()) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?, + ) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))? as i64; + Ok(columns + .into_iter() + .map(|(index, name, _)| ((index, name), lines)) + .collect()) + } + + async fn set_increment_value( + &mut self, + _table_name: &str, + _column_name: &str, + _end: i64, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/databases/csv/base.rs b/src/databases/csv/base.rs new file mode 100644 index 00000000..59deebc1 --- /dev/null +++ b/src/databases/csv/base.rs @@ -0,0 +1,43 @@ +use { + super::{utils::csv_reader, CSVDatabase}, + crate::{DBBase, Plane, Result, Row, Schema, Value, WIPError}, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl DBBase for CSVDatabase { + async fn fetch_schema(&self, _table_name: &str) -> Result> { + Ok(self.schema.clone()) + } + async fn scan_schemas(&self) -> Result> { + Ok(self + .schema + .clone() + .map(|schema| vec![schema]) + .unwrap_or_default()) + } + + async fn scan_data(&self, _table_name: &str) -> Result { + let mut reader = csv_reader(self)?; + + #[allow(clippy::needless_collect)] + // Clippy doesn't understand the need. Needed because we have borrowed values within. + reader + .records() + .enumerate() + .map(|(index, record)| { + record + .map_err(|error| WIPError::Debug(format!("{:?}", error)).into()) + .map(|record| { + ( + Value::I64(index as i64), + Row(record + .into_iter() + .map(|cell| Value::Str(cell.to_string())) + .collect()), + ) + }) + }) + .collect::>() + } +} diff --git a/src/databases/csv/mod.rs b/src/databases/csv/mod.rs new file mode 100644 index 00000000..340ffc1c --- /dev/null +++ b/src/databases/csv/mod.rs @@ -0,0 +1,96 @@ +mod auto_increment; +mod base; +mod mutable; +mod utils; + +use { + crate::{data::Schema, Column, DBFull, Database, Result, ValueType, WIPError}, + csv::ReaderBuilder, + serde::{Deserialize, Serialize}, + std::{ + default::Default, + fmt::Debug, + fs::{File, OpenOptions}, + }, + thiserror::Error, +}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum CSVDatabaseError { + #[error("CSV storages only support one table at a time")] + OnlyOneTableAllowed, +} + +pub struct CSVDatabase { + schema: Option, + path: String, + pub csv_settings: CSVSettings, +} +#[derive(Clone, Serialize, Deserialize)] +pub struct CSVSettings { + pub delimiter: u8, + pub quoting: bool, +} +impl Default for CSVSettings { + fn default() -> Self { + Self { + delimiter: b',', + quoting: true, + } + } +} + +impl DBFull for CSVDatabase {} + +impl Database { + pub fn new_csv(storage: CSVDatabase) -> Self { + Self::new(Box::new(storage)) + } +} +impl CSVDatabase { + pub fn new(path: &str) -> Result { + Self::new_with_settings(path, CSVSettings::default()) + } + pub fn new_with_settings(path: &str, csv_settings: CSVSettings) -> Result { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(path) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + let schema = discern_schema(file, &csv_settings)?; + Ok(Self { + schema, + path: path.to_string(), + csv_settings, + }) + } +} + +fn discern_schema(file: File, csv_settings: &CSVSettings) -> Result> { + let mut reader = ReaderBuilder::new() + .delimiter(csv_settings.delimiter) + .from_reader(file); + let headers = reader + .headers() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + let column_defs = headers + .iter() + .map(|header| { + let mut column = Column::default(); + column.name = header.to_string(); + column.data_type = ValueType::Str; + column + }) + .collect(); + if headers.is_empty() { + Ok(None) + } else { + Ok(Some(Schema { + table_name: String::new(), + column_defs, + indexes: vec![], + })) + } +} diff --git a/src/databases/csv/mutable.rs b/src/databases/csv/mutable.rs new file mode 100644 index 00000000..adb3b484 --- /dev/null +++ b/src/databases/csv/mutable.rs @@ -0,0 +1,93 @@ +use { + super::{CSVDatabase, CSVDatabaseError}, + crate::{Cast, DBMut, Result, Row, Schema, WIPError}, + async_trait::async_trait, + csv::WriterBuilder, + std::{fs::OpenOptions, io::Write}, +}; + +#[async_trait(?Send)] +impl DBMut for CSVDatabase { + async fn insert_schema(&mut self, schema: &Schema) -> Result<()> { + if self.schema.is_some() { + return Err(CSVDatabaseError::OnlyOneTableAllowed.into()); + } + + let mut writer = WriterBuilder::new() + .delimiter(self.csv_settings.delimiter) + .from_writer(vec![]); // Not good but was having Size issues with moving this elsewhere + + let header: Vec = schema + .column_defs + .iter() + .map(|column_def| column_def.name.clone()) + .collect(); + + writer + .write_record(header) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + writer + .flush() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + let csv_bytes = writer + .into_inner() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + let mut file = OpenOptions::new() + .truncate(true) + .write(true) + .open(self.path.as_str()) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + file.write_all(&csv_bytes) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + file.flush() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + self.schema = Some(schema.clone()); + Ok(()) + } + + async fn delete_schema(&mut self, _table_name: &str) -> Result<()> { + self.schema = None; + Ok(()) + } + + async fn insert_data(&mut self, _table_name: &str, rows: Vec) -> Result<()> { + let mut writer = WriterBuilder::new() + .delimiter(self.csv_settings.delimiter) + .from_writer(vec![]); // Not good but was having Size issues with moving this elsewhere + + for row in rows.into_iter() { + let string_row = row + .0 + .into_iter() + .map(|cell| cell.cast()) + .collect::>>()?; + writer + .write_record(string_row) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + } + + writer + .flush() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + let csv_bytes = writer + .into_inner() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + let mut file = OpenOptions::new() + .append(true) + .open(self.path.as_str()) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + + file.write_all(&csv_bytes) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + file.flush() + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + Ok(()) + } +} diff --git a/src/databases/csv/utils.rs b/src/databases/csv/utils.rs new file mode 100644 index 00000000..463ea305 --- /dev/null +++ b/src/databases/csv/utils.rs @@ -0,0 +1,22 @@ +use { + super::CSVDatabase, + crate::{Result, WIPError}, + csv::{Reader, ReaderBuilder}, + std::fs::File, +}; + +pub(crate) fn csv_reader(store: &CSVDatabase) -> Result> { + let reader = ReaderBuilder::new() + .delimiter(store.csv_settings.delimiter) + .quoting(store.csv_settings.quoting) + .buffer_capacity(8 * 500 * 1_000_000) // 500MB + .from_path(store.path.as_str()) + .map_err(|error| WIPError::Debug(format!("{:?}", error)))?; + Ok(reader) +} + +/*pub(crate) fn csv_writer(store: &CSVDatabase, init: T) -> Result> { + let writer = WriterBuilder::new().delimiter(store.csv_settings.delimiter).from_writer(init); + + Ok(writer) +}*/ diff --git a/src/databases/memory/auto_increment.rs b/src/databases/memory/auto_increment.rs new file mode 100644 index 00000000..3cfed1db --- /dev/null +++ b/src/databases/memory/auto_increment.rs @@ -0,0 +1,32 @@ +use { + crate::{AutoIncrement, MemoryDatabase, Result}, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl AutoIncrement for MemoryDatabase { + async fn generate_increment_values( + &mut self, + table_name: String, + columns: Vec<(usize, String, i64)>, + ) -> Result> { + let row_init = self + .data + .get(&table_name) + .map(|rows| rows.len() + 1) + .unwrap_or(1); + Ok(columns + .into_iter() + .map(|(index, name, _)| ((index, name), row_init as i64)) + .collect()) + } + + async fn set_increment_value( + &mut self, + _table_name: &str, + _column_name: &str, + _end: i64, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/databases/memory/base.rs b/src/databases/memory/base.rs new file mode 100644 index 00000000..2ff0d6b1 --- /dev/null +++ b/src/databases/memory/base.rs @@ -0,0 +1,80 @@ +use std::collections::{BTreeMap, HashMap}; + +use crate::{join_iters, JoinType, Row}; + +use { + crate::{ + DBBase, IndexFilter, MemoryDatabase, MemoryDatabaseError, Plane, Result, Schema, Value, + }, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl DBBase for MemoryDatabase { + async fn fetch_schema(&self, table_name: &str) -> Result> { + Ok(self.tables.get(&table_name.to_string()).cloned()) + } + async fn scan_schemas(&self) -> Result> { + Ok(self.tables.values().cloned().collect()) + } + + async fn scan_data(&self, table_name: &str) -> Result { + self.data + .get(&table_name.to_string()) + .cloned() + .ok_or(MemoryDatabaseError::TableNotFound.into()) + .map(|rows| rows.into_iter().collect()) + } + + async fn scan_data_indexed( + &self, + table_name: &str, + index_filter: IndexFilter, + ) -> Result { + let index_results = self.scan_index(table_name, index_filter).await?; + let default = HashMap::new(); + let rows = self.data.get(&table_name.to_string()).unwrap_or(&default); + Ok(index_results + .into_iter() + .filter_map(|pk| rows.get(&pk).map(|row| (pk.clone(), row.clone()))) + .collect::>()) + } + + async fn scan_index(&self, table_name: &str, index_filter: IndexFilter) -> Result> { + use IndexFilter::*; + match index_filter.clone() { + LessThan(index_name, ..) | MoreThan(index_name, ..) => { + let default = BTreeMap::new(); + let index = self + .indexes + .get(&table_name.to_string()) + .and_then(|indexes| indexes.get(&index_name)) + .unwrap_or(&default); + let index_results = match index_filter { + LessThan(_, max) => index.range(..max), + MoreThan(_, min) => index.range(min..), + _ => unreachable!(), + } + .map(|(_, pk)| pk.clone()) + .collect(); + Ok(index_results) + } + Inner(left, right) => { + let (left, right) = ( + self.scan_index(table_name, *left), + self.scan_index(table_name, *right), + ); + let (left, right) = (left.await?, right.await?); + Ok(join_iters(JoinType::Inner, left, right)) + } + Outer(left, right) => { + let (left, right) = ( + self.scan_index(table_name, *left), + self.scan_index(table_name, *right), + ); + let (left, right) = (left.await?, right.await?); + Ok(join_iters(JoinType::Outer, left, right)) + } + } + } +} diff --git a/src/databases/memory/mod.rs b/src/databases/memory/mod.rs new file mode 100644 index 00000000..a9727962 --- /dev/null +++ b/src/databases/memory/mod.rs @@ -0,0 +1,34 @@ +mod auto_increment; +mod base; +mod mutable; + +use { + crate::{database::*, Row, Schema, Value}, + serde::Serialize, + std::{ + collections::{BTreeMap, HashMap}, + fmt::Debug, + }, + thiserror::Error, +}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum MemoryDatabaseError { + #[error("table not found")] + TableNotFound, +} + +#[derive(Default, Clone)] +pub struct MemoryDatabase { + tables: HashMap, + data: HashMap>, + indexes: HashMap>>, +} + +impl DBFull for MemoryDatabase {} + +impl MemoryDatabase { + pub fn new() -> Self { + Self::default() + } +} diff --git a/src/databases/memory/mutable.rs b/src/databases/memory/mutable.rs new file mode 100644 index 00000000..6e649f8d --- /dev/null +++ b/src/databases/memory/mutable.rs @@ -0,0 +1,51 @@ +use std::collections::HashMap; + +use { + crate::{DBMut, MemoryDatabase, Result, Row, Schema, Value}, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl DBMut for MemoryDatabase { + async fn insert_schema(&mut self, schema: &Schema) -> Result<()> { + let table_name = schema.table_name.clone(); + self.data.insert(table_name.clone(), HashMap::new()); + self.tables.insert(table_name, schema.clone()); + Ok(()) + } + + async fn delete_schema(&mut self, table_name: &str) -> Result<()> { + self.tables.remove(table_name); + self.tables.remove(table_name); + Ok(()) + } + + async fn insert_data(&mut self, table_name: &str, rows: Vec) -> Result<()> { + let table_name = table_name.to_string(); + let old_rows = self.data.remove(&table_name).unwrap_or_default(); + let init = old_rows.len(); + let rows = rows + .into_iter() + .enumerate() + .map(|(index, row)| (Value::U64((index + init) as u64), row)) + .chain(old_rows.into_iter()) + .collect(); + self.data.insert(table_name, rows); + Ok(()) + } + + async fn update_index( + &mut self, + table_name: &str, + index_name: &str, + keys: Vec<(Value, Value)>, + ) -> Result<()> { + let (table_name, index_name) = (table_name.to_string(), index_name.to_string()); + let mut indexes = self.indexes.remove(&table_name).unwrap_or_default(); + let mut index = indexes.remove(&index_name).unwrap_or_default(); + index.extend(keys); + indexes.insert(index_name, index); + self.indexes.insert(table_name, indexes); + Ok(()) + } +} diff --git a/src/databases/mod.rs b/src/databases/mod.rs new file mode 100644 index 00000000..64a71bf0 --- /dev/null +++ b/src/databases/mod.rs @@ -0,0 +1,19 @@ +#[cfg(feature = "sled-database")] +mod sled; +#[cfg(feature = "sled-database")] +pub use self::sled::SledDatabase; + +#[cfg(feature = "csv-database")] +mod csv; +#[cfg(feature = "csv-database")] +pub use self::csv::{CSVDatabase, CSVDatabaseError, CSVSettings}; + +#[cfg(feature = "sheet-database")] +mod sheet; +#[cfg(feature = "sheet-database")] +pub use self::sheet::{SheetDatabase, SheetDatabaseError}; + +#[cfg(feature = "memory-database")] +mod memory; +#[cfg(feature = "memory-database")] +pub use self::memory::{MemoryDatabase, MemoryDatabaseError}; diff --git a/src/databases/sheet/auto_increment.rs b/src/databases/sheet/auto_increment.rs new file mode 100644 index 00000000..00860218 --- /dev/null +++ b/src/databases/sheet/auto_increment.rs @@ -0,0 +1,32 @@ +use { + crate::{AutoIncrement, Result, SheetDatabase, SheetDatabaseError}, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl AutoIncrement for SheetDatabase { + async fn generate_increment_values( + &mut self, + sheet_name: String, + columns: Vec<(usize, String, i64)>, + ) -> Result> { + let sheet = self + .book + .get_sheet_by_name_mut(&sheet_name) + .map_err(|_| SheetDatabaseError::FailedToGetSheet)?; + let row_init = sheet.get_row_dimensions().len(); + Ok(columns + .into_iter() + .map(|(index, name, _)| ((index, name), row_init as i64)) + .collect()) + } + + async fn set_increment_value( + &mut self, + _table_name: &str, + _column_name: &str, + _end: i64, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/databases/sheet/base.rs b/src/databases/sheet/base.rs new file mode 100644 index 00000000..2951f2d4 --- /dev/null +++ b/src/databases/sheet/base.rs @@ -0,0 +1,106 @@ +use crate::DatabaseError; +use { + crate::{Cast, Column, DBBase, Plane, Result, Row, Schema, SheetDatabase, Value}, + async_trait::async_trait, + std::convert::TryFrom, + umya_spreadsheet::{Cell, Worksheet}, +}; + +#[async_trait(?Send)] +impl DBBase for SheetDatabase { + async fn fetch_schema(&self, sheet_name: &str) -> Result> { + if let Ok(sheet) = self.book.get_sheet_by_name(sheet_name) { + schema_from_sheet(sheet).map(Some) + } else { + Ok(None) + } + } + async fn scan_schemas(&self) -> Result> { + self.book + .get_sheet_collection() + .iter() + .map(schema_from_sheet) + .collect() + } + async fn scan_data(&self, sheet_name: &str) -> Result { + let sheet = self.book.get_sheet_by_name(sheet_name).unwrap(); + let Schema { column_defs, .. } = schema_from_sheet(sheet)?; + + let row_count = sheet.get_highest_row(); + let col_count = sheet.get_highest_column(); + + let rows = vec![vec![None; col_count as usize]; (row_count as usize) - 1]; + let rows = sheet + .get_collection_to_hashmap() + .iter() + .filter(|((row, _col), _)| row != &1) + .fold(rows, |mut rows, ((row_num, col_num), cell)| { + rows[(row_num - 2) as usize][(col_num - 1) as usize] = Some(cell.clone()); + rows + }); + + rows.into_iter() + .enumerate() + .map(|(pk, row)| { + ( + Value::U64((pk + 2) as u64), + Row(row + .into_iter() + .zip(&column_defs) + .map(|(cell, Column { data_type, .. })| { + Value::Str( + cell.map(|cell| cell.get_value().to_string()) + .unwrap_or_default(), + ) + .cast_valuetype(data_type) + .unwrap_or(Value::Null) + }) + .collect()), + ) + }) + .map(Ok) + .collect::>>() + } +} + +impl TryFrom for Value { + type Error = crate::Error; + fn try_from(cell: Cell) -> Result { + Ok(match cell.get_data_type() { + Cell::TYPE_STRING | Cell::TYPE_STRING2 => Value::Str(cell.get_value().to_string()), + Cell::TYPE_BOOL => Value::Bool(Value::Str(cell.get_value().to_string()).cast()?), + Cell::TYPE_NUMERIC => Value::F64(Value::Str(cell.get_value().to_string()).cast()?), + Cell::TYPE_NULL => Value::Null, + _ => return Err(DatabaseError::Unimplemented.into()), + }) + } +} + +fn schema_from_sheet(sheet: &Worksheet) -> Result { + let mut column_defs: Vec<(_, Column)> = sheet + .get_comments() + .iter() + .filter_map(|comment| { + let coordinate = comment.get_coordinate(); + if coordinate.get_row_num() == &1 { + let col = coordinate.get_col_num(); + let text = comment.get_text().get_text(); + let column_def: Column = serde_yaml::from_str(text).unwrap_or_default(); + Some(Ok((col, column_def))) + } else { + None + } + }) + .collect::>>()?; + column_defs.sort_by(|(col_a, _), (col_b, _)| col_a.cmp(col_b)); + let column_defs = column_defs + .into_iter() + .map(|(_, column_def)| column_def) + .collect(); + + Ok(Schema { + table_name: sheet.get_name().to_string(), + column_defs, + indexes: vec![], + }) +} diff --git a/src/databases/sheet/mod.rs b/src/databases/sheet/mod.rs new file mode 100644 index 00000000..da6ce600 --- /dev/null +++ b/src/databases/sheet/mod.rs @@ -0,0 +1,49 @@ +mod auto_increment; +mod base; +mod mutable; + +use { + crate::{database::*, Result}, + serde::Serialize, + std::{fmt::Debug, path::Path}, + thiserror::Error, + umya_spreadsheet::{new_file_empty_worksheet, reader, writer, Spreadsheet, Worksheet}, +}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum SheetDatabaseError { + #[error("FSError")] + FSError, + #[error("failed to parse column information")] + FailedColumnParse, + #[error("failed to create sheet")] + FailedToCreateSheet, + #[error("failed to get sheet")] + FailedToGetSheet, +} + +pub struct SheetDatabase { + book: Spreadsheet, + path: String, +} + +impl DBFull for SheetDatabase {} + +impl SheetDatabase { + pub fn new(path: &str) -> Result { + let book = + reader::xlsx::read(Path::new(path)).unwrap_or_else(|_| new_file_empty_worksheet()); + let path = path.to_string(); + Ok(Self { book, path }) + } + pub(crate) fn save(&self) -> Result<()> { + writer::xlsx::write(&self.book, Path::new(&self.path)) + .map_err(|_| SheetDatabaseError::FSError.into()) + } + + pub(crate) fn get_sheet_mut(&mut self, sheet_name: &str) -> Result<&mut Worksheet> { + self.book + .get_sheet_by_name_mut(sheet_name) + .map_err(|_| SheetDatabaseError::FailedToGetSheet.into()) + } +} diff --git a/src/databases/sheet/mutable.rs b/src/databases/sheet/mutable.rs new file mode 100644 index 00000000..f5a7a70d --- /dev/null +++ b/src/databases/sheet/mutable.rs @@ -0,0 +1,134 @@ +use umya_spreadsheet::{Border, Comment, PatternValues, RichText, Style, TextElement}; +use { + crate::{ + Cast, DBMut, DatabaseError, Result, Row, Schema, SchemaChange, SchemaDiff, SheetDatabase, + SheetDatabaseError, Value, + }, + async_trait::async_trait, +}; + +#[async_trait(?Send)] +impl DBMut for SheetDatabase { + async fn insert_schema(&mut self, schema: &Schema) -> Result<()> { + let mut style = Style::default(); + style + .get_fill_mut() + .get_pattern_fill_mut() + .set_pattern_type(PatternValues::Gray125); + style + .get_borders_mut() + .get_bottom_mut() + .set_border_style(Border::BORDER_MEDIUM); + style + .get_borders_mut() + .get_left_mut() + .set_border_style(Border::BORDER_THIN); + style + .get_borders_mut() + .get_right_mut() + .set_border_style(Border::BORDER_THIN); + + let Schema { + column_defs, + table_name: sheet_name, + .. + } = schema; + let sheet = self + .book + .new_sheet(sheet_name) + .map_err(|_| SheetDatabaseError::FailedToCreateSheet)?; + column_defs + .iter() + .enumerate() + .try_for_each::<_, Result<_>>(|(index, column_def)| { + let col = (index as u32) + 1; + let row = 1; + sheet + .get_cell_by_column_and_row_mut(&col, &row) + .set_value(&column_def.name) + .set_style(style.clone()); + let mut comment_text_element = TextElement::default(); + comment_text_element.set_text( + serde_yaml::to_string(&column_def) + .map_err(|_| SheetDatabaseError::FailedColumnParse)?, + ); + let mut comment_text = RichText::default(); + comment_text.add_rich_text_elements(comment_text_element); + let mut comment = Comment::default(); + comment + .set_text(comment_text) + .get_coordinate_mut() + .set_col_num(col) + .set_row_num(row); + sheet.add_comments(comment); + Ok(()) + })?; + self.save() + } + async fn insert_data(&mut self, sheet_name: &str, rows: Vec) -> Result<()> { + let sheet = self.get_sheet_mut(sheet_name)?; + let row_init = sheet.get_row_dimensions().len() + 1; // TODO: Not this + rows.into_iter() + .enumerate() + .for_each(|(row_num, Row(row))| { + row.into_iter().enumerate().for_each(|(col_num, cell)| { + sheet + .get_cell_by_column_and_row_mut( + &(col_num as u32 + 1), + &((row_num + row_init) as u32), + ) + .set_value(cell); + }) + }); + self.save() + } + + async fn delete_schema(&mut self, sheet_name: &str) -> Result<()> { + self.book + .remove_sheet_by_name(sheet_name) + .map_err(|_| SheetDatabaseError::FailedToGetSheet)?; + self.save() + } + + async fn update_data(&mut self, sheet_name: &str, rows: Vec<(Value, Row)>) -> Result<()> { + let sheet = self.get_sheet_mut(sheet_name)?; + rows.into_iter() + .try_for_each::<_, Result<()>>(|(key, Row(row))| { + let row_num: i64 = key.cast()?; + row.into_iter().enumerate().for_each(|(col_num, cell)| { + sheet + .get_cell_by_column_and_row_mut(&(col_num as u32 + 1), &(row_num as u32)) + .set_value(cell); + }); + Ok(()) + })?; + self.save() + } + + async fn delete_data(&mut self, sheet_name: &str, rows: Vec) -> Result<()> { + let sheet = self.get_sheet_mut(sheet_name)?; + rows.into_iter().try_for_each::<_, Result<()>>(|key| { + let row_num: u64 = key.cast()?; + sheet.remove_row(&(row_num as u32), &1); + Ok(()) + })?; + self.save() + } + + async fn alter_table(&mut self, sheet_name: &str, schema_diff: SchemaDiff) -> Result<()> { + let changes = schema_diff.get_changes(); + let sheet = self.get_sheet_mut(sheet_name)?; + for change in changes.into_iter() { + use SchemaChange::*; + match change { + RenameTable(new_name) => { + sheet.set_name(new_name); + } + _ => return Err(DatabaseError::Unimplemented.into()), + // TODO + }; + } + + self.save() + } +} diff --git a/src/databases/sled/auto_increment.rs b/src/databases/sled/auto_increment.rs new file mode 100644 index 00000000..4bed1640 --- /dev/null +++ b/src/databases/sled/auto_increment.rs @@ -0,0 +1,72 @@ +#![cfg(feature = "auto-increment")] +use { + super::{error::err_into, SledDatabase}, + crate::{AutoIncrement, Error, Result}, + async_trait::async_trait, + fstrings::*, + sled::transaction::ConflictableTransactionError, +}; + +#[async_trait(?Send)] +impl AutoIncrement for SledDatabase { + async fn generate_increment_values( + &mut self, + table_name: String, + columns: Vec<( + usize, /*index*/ + String, /*name*/ + i64, /*row_count*/ + ) /*column*/>, + ) -> Result< + Vec<( + (usize /*index*/, String /*name*/), /*column*/ + i64, /*start_value*/ + )>, + > { + self.tree + .transaction(|tree| { + let mut results = vec![]; + for (column_index, column_name, row_count) in &columns { + // KG: I couldn't get the colunns variable in here for some reason (because it is an enclosure?) + let (column_index, column_name, row_count): (usize, String, i64) = + (*column_index, column_name.clone(), *row_count); + let table_name = table_name.clone(); + let key = f!("generator/{table_name}/{column_name}"); + let key = key.as_bytes(); + + let start_ivec = tree.get(key)?; + let start_value = start_ivec + .map(|value| bincode::deserialize(&value)) + .unwrap_or(Ok(1)) + .map_err(err_into) + .map_err(ConflictableTransactionError::Abort)?; + + let end_value = start_value + row_count; + let end_ivec = bincode::serialize(&end_value) + .map_err(err_into) + .map_err(ConflictableTransactionError::Abort)?; + + tree.insert(key, end_ivec)?; + results.push(((column_index, column_name), start_value)); + } + Ok(results) + }) + .map_err(Error::from) + } + + async fn set_increment_value( + &mut self, + table_name: &str, + column_name: &str, + end: i64, + ) -> Result<()> { + let end_ivec = bincode::serialize(&end).map_err(err_into)?; + + let key = f!("generator/{table_name}/{column_name}"); + let key = key.as_bytes(); + + self.tree.insert(key, end_ivec).map_err(err_into)?; + + Ok(()) + } +} diff --git a/src/databases/sled/base.rs b/src/databases/sled/base.rs new file mode 100644 index 00000000..89a93263 --- /dev/null +++ b/src/databases/sled/base.rs @@ -0,0 +1,115 @@ +use { + super::{ + err_into, fetch_schema, + mutable::{index_prefix, indexed_key}, + SledDatabase, + }, + crate::{ + join_iters, DBBase, IndexFilter, JoinType, NullOrd, Plane, Result, Row, Schema, Value, + }, + async_trait::async_trait, + rayon::slice::ParallelSliceMut, + sled::IVec, + std::{cmp::Ordering, convert::Into}, +}; + +#[async_trait(?Send)] +impl DBBase for SledDatabase { + async fn fetch_schema(&self, table_name: &str) -> Result> { + fetch_schema(&self.tree, table_name).map(|(_, schema)| schema) + } + async fn scan_schemas(&self) -> Result> { + let prefix = "schema/".to_string(); + self.tree + .scan_prefix(prefix.as_bytes()) + .map(|item| { + let (_, bytes) = item.map_err(err_into)?; + bincode::deserialize(&bytes).map_err(err_into) + }) + .collect() + } + + async fn scan_data(&self, table_name: &str) -> Result { + let prefix = format!("data/{}/", table_name); + + self.tree + .scan_prefix(prefix.as_bytes()) + .map(|item| { + let (key, value) = item.map_err(err_into)?; + let value = bincode::deserialize(&value).map_err(err_into)?; + + Ok(((&key).into(), value)) + }) + .collect::>>() + } + + async fn scan_data_indexed( + &self, + table_name: &str, + index_filter: IndexFilter, + ) -> Result { + let index_results = self.scan_index(table_name, index_filter).await?; + let row_results = index_results.into_iter().map(|pk| { + if let Value::Bytes(pk) = pk { + self.tree + .get(&pk) + .map(|row| (pk, row.unwrap() /*TODO: Handle!*/)) + } else { + unreachable!(); + } + }); + row_results + .map(|item| { + let (pk, value) = item.map_err(err_into)?; + let value = bincode::deserialize(&value).map_err(err_into)?; + + Ok((Value::Bytes(pk.to_vec()), value)) + }) + .collect::>>() + } + + async fn scan_index(&self, table_name: &str, index_filter: IndexFilter) -> Result> { + use IndexFilter::*; + match index_filter.clone() { + LessThan(index_name, ..) | MoreThan(index_name, ..) => { + // TODO: Genericise and optimise + let prefix = index_prefix(table_name, &index_name); + let abs_min = IVec::from(prefix.as_bytes()); + let abs_max = IVec::from([prefix.as_bytes(), &[0xFF]].concat()); + + let index_results = match index_filter { + LessThan(_, max) => self.tree.range(abs_min..indexed_key(&prefix, &max)?), + MoreThan(_, min) => self.tree.range(indexed_key(&prefix, &min)?..abs_max), + _ => unreachable!(), + }; + let mut index_results = index_results + .map(|item| { + let (_, pk) = item.map_err(err_into)?; + let pk = Value::Bytes(pk.to_vec()); + + Ok(pk) + }) + .collect::>>()?; + + index_results.par_sort_unstable_by(|a, b| a.null_cmp(b).unwrap_or(Ordering::Equal)); + Ok(index_results) + } + Inner(left, right) => { + let (left, right) = ( + self.scan_index(table_name, *left), + self.scan_index(table_name, *right), + ); + let (left, right) = (left.await?, right.await?); + Ok(join_iters(JoinType::Inner, left, right)) + } + Outer(left, right) => { + let (left, right) = ( + self.scan_index(table_name, *left), + self.scan_index(table_name, *right), + ); + let (left, right) = (left.await?, right.await?); + Ok(join_iters(JoinType::Outer, left, right)) + } + } + } +} diff --git a/src/databases/sled/error.rs b/src/databases/sled/error.rs new file mode 100644 index 00000000..19b202c4 --- /dev/null +++ b/src/databases/sled/error.rs @@ -0,0 +1,58 @@ +use {crate::Error, sled::transaction::TransactionError, std::str, thiserror::Error as ThisError}; + +#[derive(ThisError, Debug)] +pub enum DatabaseError { + #[error(transparent)] + Sled(#[from] sled::Error), + #[error(transparent)] + Bincode(#[from] bincode::Error), + #[error(transparent)] + Str(#[from] str::Utf8Error), +} + +impl From for Error { + fn from(e: DatabaseError) -> Error { + use DatabaseError::*; + + match e { + Sled(e) => Error::Database(Box::new(e)), + Bincode(e) => Error::Database(e), + Str(e) => Error::Database(Box::new(e)), + } + } +} +impl From for Error { + fn from(e: sled::Error) -> Error { + Error::Database(Box::new(e)) + } +} +impl From for Error { + fn from(e: bincode::Error) -> Error { + Error::Database(Box::new(e)) + } +} + +impl From for Error { + fn from(e: str::Utf8Error) -> Error { + Error::Database(Box::new(e)) + } +} + +impl From> for Error { + fn from(error: TransactionError) -> Error { + match error { + TransactionError::Abort(error) => error, + TransactionError::Storage(error) => DatabaseError::Sled(error).into(), + } + } +} + +pub fn err_into(error: E) -> Error +where + E: Into, +{ + let error: DatabaseError = error.into(); + let error: Error = error.into(); + + error +} diff --git a/src/databases/sled/mod.rs b/src/databases/sled/mod.rs new file mode 100644 index 00000000..99517913 --- /dev/null +++ b/src/databases/sled/mod.rs @@ -0,0 +1,55 @@ +mod auto_increment; +mod base; +mod error; +mod mutable; +mod util; +#[cfg(not(feature = "alter-table"))] +impl crate::AlterTable for SledDatabase {} +#[cfg(not(feature = "auto-increment"))] +impl crate::AutoIncrement for SledDatabase {} + +use { + crate::{DBFull, Database, Error, Result, Schema}, + error::err_into, + sled::{self, Config, Db}, + std::convert::TryFrom, +}; + +#[derive(Debug, Clone)] +pub struct SledDatabase { + tree: Db, +} +impl DBFull for SledDatabase {} +impl SledDatabase { + pub fn new(filename: &str) -> Result { + let tree = sled::open(filename).map_err(err_into)?; + Ok(Self { tree }) + } +} + +impl Database { + pub fn new_sled(sled: SledDatabase) -> Self { + Self::new(Box::new(sled)) + } +} + +impl TryFrom for SledDatabase { + type Error = Error; + + fn try_from(config: Config) -> Result { + let tree = config.open().map_err(err_into)?; + + Ok(Self { tree }) + } +} + +fn fetch_schema(tree: &Db, table_name: &str) -> Result<(String, Option)> { + let key = format!("schema/{}", table_name); + let value = tree.get(&key.as_bytes()).map_err(err_into)?; + let schema = value + .map(|v| bincode::deserialize(&v)) + .transpose() + .map_err(err_into)?; + + Ok((key, schema)) +} diff --git a/src/databases/sled/mutable.rs b/src/databases/sled/mutable.rs new file mode 100644 index 00000000..37ec8a06 --- /dev/null +++ b/src/databases/sled/mutable.rs @@ -0,0 +1,265 @@ +use { + super::{err_into, fetch_schema, SledDatabase}, + crate::{ + BigEndian, Column, DBMut, DatabaseError, Result, Row, Schema, SchemaChange, SchemaDiff, + Value, + }, + async_trait::async_trait, + rayon::prelude::*, + sled::IVec, + std::convert::From, +}; + +#[async_trait(?Send)] +impl DBMut for SledDatabase { + async fn insert_schema(&mut self, schema: &Schema) -> Result<()> { + let key = format!("schema/{}", schema.table_name); + let key = key.as_bytes(); + let value = bincode::serialize(schema)?; + + self.tree.insert(key, value)?; + + Ok(()) + } + + async fn delete_schema(&mut self, table_name: &str) -> Result<()> { + let prefix = format!("data/{}/", table_name); + + let mut keys = self + .tree + .scan_prefix(prefix.as_bytes()) + .par_bridge() + .map(|result| result.map(|(key, _)| key).map_err(err_into)) + .collect::>>()?; + + let table_key = format!("schema/{}", table_name); + keys.push(IVec::from(table_key.as_bytes())); + + let batch = keys + .into_iter() + .fold(sled::Batch::default(), |mut batch, key| { + batch.remove(key); + batch + }); + + self.tree.apply_batch(batch).map_err(err_into) + } + + async fn insert_data(&mut self, table_name: &str, rows: Vec) -> Result<()> { + let ready_rows = rows + .into_par_iter() + .map(|row| { + let id = self.tree.generate_id()?; + let id = id.to_be_bytes(); + let prefix = format!("data/{}/", table_name); + + let bytes = prefix + .into_bytes() + .into_iter() + .chain(id.iter().copied()) + .collect::>(); + + let key = IVec::from(bytes); + let value = bincode::serialize(&row)?; + Ok((key, value)) + }) + .collect::>>()?; + + let batch = + ready_rows + .into_iter() + .fold(sled::Batch::default(), |mut batch, (key, value)| { + batch.insert(key, value); + batch + }); + self.tree.apply_batch(batch).map_err(err_into) + } + + async fn update_data(&mut self, _table_name: &str, rows: Vec<(Value, Row)>) -> Result<()> { + let ready_rows = rows + .into_par_iter() + .map(|(key, value)| { + let value = bincode::serialize(&value)?; + let key = IVec::from(&key); + Ok((key, value)) + }) + .collect::>>()?; + + let batch = + ready_rows + .into_iter() + .fold(sled::Batch::default(), |mut batch, (key, value)| { + batch.insert(key, value); + batch + }); + self.tree.apply_batch(batch).map_err(err_into) + } + + async fn delete_data(&mut self, _table_name: &str, keys: Vec) -> Result<()> { + let batch = keys + .into_iter() + .fold(sled::Batch::default(), |mut batch, key| { + batch.remove(IVec::from(&key)); + batch + }); + self.tree.apply_batch(batch).map_err(err_into) + } + + async fn update_index( + &mut self, + table_name: &str, + index_name: &str, + keys: Vec<(Value, Value)>, + ) -> Result<()> { + self.remove_index(table_name, index_name)?; + + let prefix = index_prefix(table_name, index_name); + + let keys: Vec<(IVec, IVec)> = keys + .into_iter() + .enumerate() + .map(|(idx, (index_key, row_key))| { + // TODO: Don't use idx where unique + let index_key = unique_indexed_key(&prefix, &index_key, idx)?; + let row_key = IVec::from(&row_key); + Ok((index_key, row_key)) + }) + .collect::>>()?; + + let batch = + keys.into_iter() + .fold(sled::Batch::default(), |mut batch, (index_key, row_key)| { + batch.insert(index_key, row_key); + batch + }); + + self.tree.apply_batch(batch).map_err(err_into) + } + + async fn alter_table(&mut self, table_name: &str, schema_diff: SchemaDiff) -> Result<()> { + let changes = schema_diff.get_changes(); + for change in changes.into_iter() { + use SchemaChange::*; + match change { + RenameTable(new_name) => self.rename_table(table_name, new_name), + ColumnAdd(column_def) => self.add_column(table_name, column_def), + ColumnRemove(index) => self.remove_column(table_name, index), + IndexRemove(index) => { + let schema = fetch_schema(&self.tree, table_name)? + .1 + .ok_or(DatabaseError::TableNotFound)?; + if let Some(index) = schema.indexes.get(index) { + self.remove_index(table_name, &index.name) + } else { + Ok(()) + } + } + ColumnUpdate(..) | IndexAdd(..) => Ok(()), + _ => Err(DatabaseError::Unimplemented.into()), + // TODO: Column remove & add: manipulate all rows + // TODO: Index remove, add and update: rebuild + }?; + } + + let (key, schema) = fetch_schema(&self.tree, table_name)?; + let schema = schema.ok_or(DatabaseError::TableNotFound)?; + let schema = schema_diff.merge(schema); + let schema_value = bincode::serialize(&schema)?; + self.tree.insert(key, schema_value)?; + + Ok(()) + } +} + +impl SledDatabase { + pub fn rename_table(&mut self, old_name: &str, new_name: String) -> Result<()> { + let (key, schema) = fetch_schema(&self.tree, old_name)?; + let schema = schema.ok_or(DatabaseError::TableNotFound)?; + self.tree.remove(key)?; + + let value = bincode::serialize(&schema)?; + let key = format!("schema/{}", new_name); + let key = key.as_bytes(); + self.tree.insert(key, value)?; + + let prefix = format!("data/{}/", old_name); + + for item in self.tree.scan_prefix(prefix.as_bytes()) { + let (key, value) = item?; + + let new_key = std::str::from_utf8(key.as_ref())?; + let new_key = new_key.replace(old_name, &new_name); + self.tree.insert(new_key, value)?; + + self.tree.remove(key)?; + } + + Ok(()) + } + pub fn add_column(&mut self, table_name: &str, column: Column) -> Result<()> { + let value = match (&column.default, &column.is_nullable) { + (Some(_expr), _) => Err(DatabaseError::Unimplemented), // TODO + (None, true) => Ok(Value::Null), + (None, false) => Err(DatabaseError::Unimplemented), + }?; + + let prefix = format!("data/{}/", table_name); + + for item in self.tree.scan_prefix(prefix.as_bytes()) { + let (key, row) = item?; + let row: Row = bincode::deserialize(&row)?; + let row = Row(row.0.into_iter().chain([value.clone()]).collect()); + let row = bincode::serialize(&row)?; + + self.tree.insert(key, row)?; + } + Ok(()) + } + pub fn remove_column(&mut self, table_name: &str, index: usize) -> Result<()> { + let prefix = format!("data/{}/", table_name); + for item in self.tree.scan_prefix(prefix.as_bytes()) { + let (key, row) = item?; + let row: Row = bincode::deserialize(&row)?; + let mut row = row.0; + row.remove(index); + let row = bincode::serialize(&Row(row))?; + + self.tree.insert(key, row)?; + } + Ok(()) + } + pub fn remove_index(&mut self, table_name: &str, index_name: &str) -> Result<()> { + let prefix = index_prefix(table_name, index_name); + let remove_keys = self + .tree + .scan_prefix(prefix.as_bytes()) + .par_bridge() + .map(|result| result.map(|(key, _)| key).map_err(err_into)) + .collect::>>()?; + let batch = remove_keys + .into_iter() + .fold(sled::Batch::default(), |mut batch, key| { + batch.remove(key); + batch + }); + self.tree.apply_batch(batch).map_err(err_into) + } +} + +pub fn index_prefix(table_name: &str, index_name: &str) -> String { + format!("index/{}/{}/", table_name, index_name) +} + +pub fn indexed_key(prefix: &str, index: &Value) -> Result { + Ok([prefix.as_bytes(), &index.to_be_bytes()].concat().into()) +} +pub fn unique_indexed_key(prefix: &str, index: &Value, idx: usize) -> Result { + Ok([ + prefix.as_bytes(), + &index.to_be_bytes(), + &[0x00], + &idx.to_be_bytes(), + ] + .concat() + .into()) +} diff --git a/src/databases/sled/util.rs b/src/databases/sled/util.rs new file mode 100644 index 00000000..e362b315 --- /dev/null +++ b/src/databases/sled/util.rs @@ -0,0 +1,17 @@ +use {crate::Value, sled::IVec, std::convert::From}; + +impl From<&IVec> for Value { + fn from(from: &IVec) -> Self { + Value::Bytes(from.to_vec()) + } +} + +impl From<&Value> for IVec { + fn from(from: &Value) -> Self { + if let Value::Bytes(bytes) = from { + IVec::from(bytes.clone()) + } else { + panic!("Tried to convert value of non-bytes into IVec") + } + } +} diff --git a/src/executor/alter_row/auto_increment.rs b/src/executor/alter_row/auto_increment.rs new file mode 100644 index 00000000..4078525f --- /dev/null +++ b/src/executor/alter_row/auto_increment.rs @@ -0,0 +1,44 @@ +#![cfg(feature = "auto-increment")] +use crate::{Column, Glue, Result, Row, Value, ValueDefault}; + +impl Glue { + pub async fn auto_increment( + &mut self, + database: &Option, + table_name: &str, + columns: &[Column], + rows: &mut [Row], + ) -> Result<()> { + let auto_increment_columns = columns + .iter() + .enumerate() + .filter(|(_, column)| matches!(column.default, Some(ValueDefault::AutoIncrement(_)))) + .map(|(index, column)| { + ( + index, + column.name.clone(), + rows.iter() + .filter(|row| matches!(row.0.get(index), Some(Value::Null))) + .count() as i64, + ) + }) + .collect(); + + let column_values = self + .get_mut_database(database)? + .generate_increment_values(table_name.to_string(), auto_increment_columns) + .await?; + + let mut column_values = column_values; + for row in rows.iter_mut() { + for ((index, _name), value) in &mut column_values { + let cell = row.0.get_mut(*index).unwrap(); + if matches!(cell, Value::Null) { + *cell = Value::I64(*value); + *value += 1; + } + } + } + Ok(()) + } +} diff --git a/src/executor/alter_row/delete.rs b/src/executor/alter_row/delete.rs new file mode 100644 index 00000000..2199e97d --- /dev/null +++ b/src/executor/alter_row/delete.rs @@ -0,0 +1,75 @@ +use { + crate::{ + data::Schema, executor::types::ColumnInfo, Column, ComplexTableName, ExecuteError, Glue, + MetaRecipe, Payload, PlannedRecipe, Result, Value, + }, + sqlparser::ast::{Expr, ObjectName}, +}; + +impl Glue { + pub async fn delete( + &mut self, + table_name: &ObjectName, + selection: &Option, + ) -> Result { + let ComplexTableName { + name: table_name, + database, + .. + } = table_name.try_into()?; + let Schema { + column_defs, + indexes, + .. + } = self + .get_database(&database)? + .fetch_schema(&table_name) + .await? + .ok_or(ExecuteError::TableNotExists)?; + + let columns = column_defs + .clone() + .into_iter() + .map(|Column { name, .. }| ColumnInfo::of_name(name)) + .collect::>(); + let filter = selection + .clone() + .map(|selection| { + PlannedRecipe::new( + MetaRecipe::new(selection)?.simplify_by_context(&*self.get_context()?)?, + &columns, + ) + }) + .unwrap_or(Ok(PlannedRecipe::TRUE))?; + + let keys = self + .get_database(&database)? + .scan_data(&table_name) + .await? + .into_iter() + .filter_map(|(key, row)| { + let row = row.0; + + let confirm_constraint = filter.confirm_constraint(&row); + match confirm_constraint { + Ok(true) => Some(Ok(key)), + Ok(false) => None, + Err(error) => Some(Err(error)), + } + }) + .collect::>>()?; + + let num_keys = keys.len(); + + let database = &mut **self.get_mut_database(&None)?; + let result = database + .delete_data(&table_name, keys) + .await + .map(|_| Payload::Delete(num_keys))?; + + for index in indexes.iter() { + index.reset(database, &table_name, &column_defs).await?; // TODO: Not this; optimise + } + Ok(result) + } +} diff --git a/src/executor/alter_row/insert.rs b/src/executor/alter_row/insert.rs new file mode 100644 index 00000000..0b84a56f --- /dev/null +++ b/src/executor/alter_row/insert.rs @@ -0,0 +1,63 @@ +use { + super::{columns_to_positions, validate}, + crate::{data::Schema, ComplexTableName, ExecuteError, Glue, Payload, Result, Row}, + sqlparser::ast::{Ident, ObjectName, Query}, +}; + +impl Glue { + pub async fn insert( + &mut self, + table_name: &ObjectName, + columns: &[Ident], + source: &Query, + expect_data: bool, + ) -> Result { + let ComplexTableName { + name: table_name, + database, + .. + } = &table_name.try_into()?; + let Schema { + column_defs, + indexes, + .. + } = self + .get_database(database)? + .fetch_schema(table_name) + .await? + .ok_or(ExecuteError::TableNotExists)?; + + // TODO: Multi storage + let (labels, mut rows) = self.query(source.clone()).await?; + let column_positions = columns_to_positions(&column_defs, columns)?; + + validate(&column_defs, &column_positions, &mut rows)?; + let mut rows: Vec = rows.into_iter().map(Row).collect(); + #[cfg(feature = "auto-increment")] + self.auto_increment(database, table_name, &column_defs, &mut rows) + .await?; + self.validate_unique(database, table_name, &column_defs, &rows, None) + .await?; + + let num_rows = rows.len(); + + let database = &mut **self.get_mut_database(database)?; + + let result = database.insert_data(table_name, rows.clone()).await; + + let result = result.map(|_| { + if expect_data { + Payload::Select { labels, rows } + } else { + Payload::Insert(num_rows) + } + })?; + + for index in indexes.iter() { + // TODO: Should definitely be just inserting an index record + index.reset(database, table_name, &column_defs).await?; // TODO: Not this; optimise + } + + Ok(result) + } +} diff --git a/src/executor/alter_row/mod.rs b/src/executor/alter_row/mod.rs new file mode 100644 index 00000000..4634d026 --- /dev/null +++ b/src/executor/alter_row/mod.rs @@ -0,0 +1,8 @@ +mod auto_increment; +mod delete; +mod insert; +mod update; +mod validate; +mod validate_unique; + +pub use validate::{columns_to_positions, validate, ValidateError}; diff --git a/src/executor/alter_row/update.rs b/src/executor/alter_row/update.rs new file mode 100644 index 00000000..f3b2435a --- /dev/null +++ b/src/executor/alter_row/update.rs @@ -0,0 +1,132 @@ +use { + super::{columns_to_positions, validate}, + crate::{ + data::Schema, + executor::types::{ColumnInfo, Row as VecRow}, + Column, ComplexTableName, ExecuteError, Glue, MetaRecipe, Payload, PlannedRecipe, + RecipeUtilities, Result, Row, Value, + }, + sqlparser::ast::{Assignment, Expr, TableFactor, TableWithJoins}, +}; + +impl Glue { + pub async fn update( + &mut self, + table: &TableWithJoins, + selection: &Option, + assignments: &[Assignment], + ) -> Result { + // TODO: Complex updates (joins) + let ComplexTableName { + name: table, + database, + .. + } = match &table.relation { + TableFactor::Table { name, .. } => name.try_into(), + _ => Err(ExecuteError::QueryNotSupported.into()), + }?; + let Schema { + column_defs, + indexes, + .. + } = self + .get_database(&database)? + .fetch_schema(&table) + .await? + .ok_or(ExecuteError::TableNotExists)?; + + let columns = column_defs + .clone() + .into_iter() + .map(|Column { name, .. }| ColumnInfo::of_name(name)) + .collect::>(); + + let filter = selection + .clone() + .map(|selection| { + PlannedRecipe::new( + MetaRecipe::new(selection)?.simplify_by_context(&*self.get_context()?)?, + &columns, + ) + }) + .unwrap_or(Ok(PlannedRecipe::TRUE))?; + + let assignments = assignments + .iter() + .map(|assignment| { + let Assignment { id, value } = assignment; + let column_compare = id + .clone() + .into_iter() + .map(|component| component.value) + .collect(); + let index = columns + .iter() + .position(|column| column == &column_compare) + .ok_or(ExecuteError::ColumnNotFound)?; + let recipe = PlannedRecipe::new( + MetaRecipe::new(value.clone())?.simplify_by_context(&*self.get_context()?)?, + &columns, + )?; + Ok((index, recipe)) + }) + .collect::>>()?; + + let keyed_rows = self + .get_database(&None)? + .scan_data(&table) + .await? + .into_iter() + .filter_map(|(key, row)| { + let row = row.0; + + let confirm_constraint = filter.confirm_constraint(&row); + if let Ok(false) = confirm_constraint { + return None; + } else if let Err(error) = confirm_constraint { + return Some(Err(error)); + } + let row = row + .iter() + .enumerate() + .map(|(index, old_value)| { + assignments + .iter() + .find(|(assignment_index, _)| assignment_index == &index) + .map(|(_, assignment_recipe)| { + assignment_recipe.clone().simplify_by_row(&row)?.confirm() + }) + .unwrap_or_else(|| Ok(old_value.clone())) + }) + .collect::>(); + Some(row.map(|row| (key, row))) + }) + .collect::>>()?; + + let column_positions = columns_to_positions(&column_defs, &[])?; + let (keys, mut rows): (Vec, Vec) = keyed_rows.into_iter().unzip(); + validate(&column_defs, &column_positions, &mut rows)?; + + let table = table.as_str(); + let mut rows: Vec = rows.into_iter().map(Row).collect(); + #[cfg(feature = "auto-increment")] + self.auto_increment(&database, table, &column_defs, &mut rows) + .await?; + self.validate_unique(&database, table, &column_defs, &rows, Some(&keys)) + .await?; + let keyed_rows: Vec<(Value, Row)> = keys.into_iter().zip(rows).collect(); + let num_rows = keyed_rows.len(); + + let database = &mut **self.get_mut_database(&database)?; + + let result = database + .update_data(table, keyed_rows) + .await + .map(|_| Payload::Update(num_rows))?; + + for index in indexes.iter() { + index.reset(database, table, &column_defs).await?; // TODO: Not this; optimise + } + Ok(result) + } +} diff --git a/src/executor/alter_row/validate.rs b/src/executor/alter_row/validate.rs new file mode 100644 index 00000000..cb57c55f --- /dev/null +++ b/src/executor/alter_row/validate.rs @@ -0,0 +1,120 @@ +use { + crate::{ + executor::types::Row, Column, Ingredient, Recipe, RecipeUtilities, Resolve, Result, + SimplifyBy, Value, ValueDefault, ValueType, + }, + rayon::prelude::*, + serde::Serialize, + sqlparser::ast::Ident, + thiserror::Error as ThisError, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum ValidateError { + #[error("expected value for column which neither accepts NULL nor has a default")] + MissingValue, + #[error("wrong number of values in insert statement")] + WrongNumberOfValues, + #[error("default value failed to be calculated")] + BadDefault, + #[error("column '{0}' not found")] + ColumnNotFound(String), + #[error("found duplicate value on unique field")] + //#[error("column '{0}' is unique but '{1:?}' was attempted to be stored twice")] + DuplicateEntryOnUniqueField, /*(String, Value)*/ + + #[error("this should be impossible, please report")] + UnreachableUniqueValues, +} + +pub fn columns_to_positions(column_defs: &[Column], columns: &[Ident]) -> Result> { + if columns.is_empty() { + Ok((0..column_defs.len()).collect()) + } else { + columns + .iter() + .map(|stated_column| { + column_defs + .iter() + .position(|column_def| stated_column.value == column_def.name) + .ok_or_else(|| { + ValidateError::ColumnNotFound(stated_column.value.clone()).into() + }) + }) + .collect::>>() + } +} + +pub fn validate(columns: &[Column], stated_columns: &[usize], rows: &mut Vec) -> Result<()> { + if rows.iter().any(|row| row.len() != stated_columns.len()) { + return Err(ValidateError::WrongNumberOfValues.into()); + } + + let column_info = columns + .iter() + .enumerate() + .map(|(column_def_index, column)| { + let index = stated_columns + .iter() + .position(|stated_column| stated_column == &column_def_index); + + let nullable = column.is_nullable || column.default.is_some(); + + let failure_recipe = if let Some(ValueDefault::Recipe(expr)) = &column.default { + Some(Recipe::new_without_meta(expr.clone())?) + } else if nullable { + Some(Recipe::NULL) + } else { + None + }; + Ok((index, failure_recipe, nullable, &column.data_type)) + }) + .collect::, Option, bool, &ValueType)>>>()?; + *rows = rows + .into_par_iter() + .map(|row| { + column_info + .iter() + .map(|(index, failure_recipe, nullable, data_type)| { + index + .map(|index| { + row.get(index).map(|value| { + let mut value = value.clone(); + if let Err(error) = value.validate_null(*nullable) { + value = if let Some(fallback) = failure_recipe.clone() { + if !matches!( + fallback, + Recipe::Ingredient(Ingredient::Value(Value::Null)) + ) { + fallback + .simplify(SimplifyBy::Basic)? + .as_solution() + .ok_or(ValidateError::BadDefault)? + } else { + return Err(error); + } + } else { + return Err(error); + } + } + value.is(data_type)?; + Ok(value) + }) + }) + .flatten() + .unwrap_or({ + if let Some(recipe) = failure_recipe.clone() { + recipe + .simplify(SimplifyBy::Basic)? + .as_solution() + .ok_or_else(|| ValidateError::BadDefault.into()) + } else { + Err(ValidateError::MissingValue.into()) + } + }) + }) + .collect::>() + }) + .collect::>>()?; + Ok(()) +} diff --git a/src/executor/alter_row/validate_unique.rs b/src/executor/alter_row/validate_unique.rs new file mode 100644 index 00000000..0f48f63a --- /dev/null +++ b/src/executor/alter_row/validate_unique.rs @@ -0,0 +1,162 @@ +use { + crate::{Column, Glue, NullOrd, Result, Row, ValidateError, Value}, + std::cmp::Ordering, +}; + +macro_rules! some_or_continue { + ($option: expr) => { + match $option { + Some(value) => value, + None => return Some(Ok(())), + } + }; +} +macro_rules! some_or { + ($option: expr, $or: block) => { + match $option { + Some(value) => value, + None => $or, + } + }; +} + +impl Glue { + pub(crate) async fn validate_unique( + &self, + database: &Option, + table_name: &str, + column_defs: &[Column], + rows: &[Row], + ignore_keys: Option<&[Value]>, + ) -> Result<()> { + let unique_columns: Vec = column_defs + .iter() + .enumerate() + .filter_map(|(index, column_def)| { + if column_def.is_unique { + Some(index) + } else { + None + } + }) + .collect(); + let mut existing_values: Vec> = vec![vec![]; unique_columns.len()]; + self.get_database(database)? + .scan_data(table_name) + .await? + .into_iter() + .try_for_each::<_, Result<_>>(|(key, row)| { + if let Some(ignore_keys) = ignore_keys { + if ignore_keys.iter().any(|ignore_key| ignore_key == &key) { + return Ok(()); + } + } + let row = row.0; + unique_columns + .iter() + .enumerate() + .map(|(index, row_index)| { + existing_values + .get_mut(index)? + .push(row.get(*row_index)?.clone()); + Some(()) + }) + .collect::>() + .ok_or_else(|| ValidateError::UnreachableUniqueValues.into()) + })?; + + let mut new_values: Vec> = vec![vec![]; unique_columns.len()]; + rows.iter().try_for_each::<_, Result<_>>(|row| { + unique_columns + .iter() + .enumerate() + .map(|(index, row_index)| { + new_values + .get_mut(index)? + .push(row.0.get(*row_index)?.clone()); + Some(()) + }) + .collect::>() + .ok_or_else(|| ValidateError::UnreachableUniqueValues.into()) + })?; + let mut existing_values_iter = existing_values.into_iter(); + new_values + .into_iter() + .map(|mut new_values| { + let mut existing_values = existing_values_iter.next()?; + + existing_values.sort_unstable_by(|value_l, value_r| { + value_l.partial_cmp(value_r).unwrap_or(Ordering::Equal) + }); + new_values.sort_unstable_by(|value_l, value_r| { + value_l.partial_cmp(value_r).unwrap_or(Ordering::Equal) + }); + + let mut existing_values = existing_values.into_iter(); + let mut new_values = new_values.into_iter(); + + let mut new_value = some_or_continue!(new_values.next()); + let mut existing_value = some_or!(existing_values.next(), { + loop { + let new_new = some_or_continue!(new_values.next()); + if new_new == new_value { + return Some(Err(ValidateError::DuplicateEntryOnUniqueField.into())); + } + new_value = new_new; + } + }); + + loop { + match existing_value.null_cmp(&new_value) { + Some(Ordering::Equal) => { + return Some(Err(ValidateError::DuplicateEntryOnUniqueField.into())) + } + Some(Ordering::Greater) => { + let new_new = some_or_continue!(new_values.next()); + if new_new == new_value { + return Some( + Err(ValidateError::DuplicateEntryOnUniqueField.into()), + ); + } + new_value = new_new; + } + Some(Ordering::Less) => { + existing_value = some_or!(existing_values.next(), { + loop { + let new_new = some_or_continue!(new_values.next()); + if new_new == new_value { + return Some(Err( + ValidateError::DuplicateEntryOnUniqueField.into(), + )); + } + new_value = new_new; + } + }); + } + None => { + let new_new = some_or_continue!(new_values.next()); + if new_new == new_value { + return Some( + Err(ValidateError::DuplicateEntryOnUniqueField.into()), + ); + } + new_value = new_new; + existing_value = some_or!(existing_values.next(), { + loop { + let new_new = some_or_continue!(new_values.next()); + if new_new == new_value { + return Some(Err( + ValidateError::DuplicateEntryOnUniqueField.into(), + )); + } + new_value = new_new; + } + }); + } + } + } + }) + .collect::>>() + .ok_or(ValidateError::UnreachableUniqueValues)? + } +} diff --git a/src/executor/alter_table/alter_table.rs b/src/executor/alter_table/alter_table.rs new file mode 100644 index 00000000..de1d7f29 --- /dev/null +++ b/src/executor/alter_table/alter_table.rs @@ -0,0 +1,77 @@ +use { + super::{validate, AlterError}, + crate::{data::get_name, Error, Glue, Result, SchemaDiff}, + sqlparser::ast::{AlterTableOperation, ObjectName}, +}; + +impl Glue { + pub async fn alter_table( + &mut self, + name: &ObjectName, + operation: &AlterTableOperation, + ) -> Result<()> { + let table_name = get_name(name).map_err(Error::from)?; + let database = &mut **self.get_mut_database(&None)?; + + let diff = match operation { + AlterTableOperation::RenameTable { + table_name: new_table_name, + } => { + let new_table_name = get_name(new_table_name).map_err(Error::from)?; + + SchemaDiff::new_rename(new_table_name.clone()) + } + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } => { + let schema = database + .fetch_schema(table_name) + .await? + .ok_or(AlterError::TableNotFound(table_name.clone()))?; + let (column_index, column) = schema + .column_defs + .into_iter() + .enumerate() + .find(|(_, column)| column.name == old_column_name.value) + .ok_or(AlterError::ColumnNotFound( + table_name.clone(), + old_column_name.value.clone(), + ))?; + SchemaDiff::new_rename_column(column_index, column, new_column_name.value.clone()) + } + AlterTableOperation::AddColumn { column_def } => { + validate(column_def).map_err(Error::from)?; + + SchemaDiff::new_add_column(column_def.into()) + } + AlterTableOperation::DropColumn { + column_name, + if_exists: _, + .. + } => { + let schema = database + .fetch_schema(table_name) + .await? + .ok_or(AlterError::TableNotFound(table_name.clone()))?; + let (column_index, _) = schema + .column_defs + .into_iter() + .enumerate() + .find(|(_, column)| column.name == column_name.value) + .ok_or(AlterError::ColumnNotFound( + table_name.clone(), + column_name.value.clone(), + ))?; + + SchemaDiff::new_remove_column(column_index) + } + _ => { + return Err( + AlterError::UnsupportedAlterTableOperation(operation.to_string()).into(), + ) + } + }; + database.alter_table(table_name, diff).await + } +} diff --git a/src/executor/alter_table/create_index.rs b/src/executor/alter_table/create_index.rs new file mode 100644 index 00000000..805bede2 --- /dev/null +++ b/src/executor/alter_table/create_index.rs @@ -0,0 +1,73 @@ +use { + crate::{data::get_name, AlterError, ExecuteError, Glue, Index, Result, SchemaDiff}, + sqlparser::ast::{Expr, ObjectName, OrderByExpr}, +}; + +impl Glue { + pub async fn create_index( + &mut self, + table: &ObjectName, + name: &ObjectName, + columns: &[OrderByExpr], + unique: bool, + if_not_exists: bool, + ) -> Result<()> { + let name = name + .0 + .last() + .ok_or(ExecuteError::QueryNotSupported)? + .value + .clone(); + + let table_name = get_name(table)?; + let database = &mut **self.get_mut_database(&None)?; + + let schema = database + .fetch_schema(table_name) + .await? + .ok_or(ExecuteError::TableNotExists)?; + + if schema.indexes.iter().any(|index| index.name == name) { + if !if_not_exists { + Err(AlterError::AlreadyExists(name).into()) + } else { + Ok(()) + } + } else { + let mut columns = columns.iter(); + let column = columns.next().and_then(|column| match column.expr.clone() { + Expr::Identifier(ident) => Some(ident.value), + _ => None, + }); + if columns.next().is_some() { + Err(AlterError::UnsupportedNumberOfIndexColumns(name).into()) + } else if column + .as_ref() + .and_then(|column| { + schema + .column_defs + .iter() + .find(|column_def| &column_def.name == column) + }) + .is_none() + { + Err(AlterError::ColumnNotFound( + table_name.clone(), + column.unwrap_or_else(|| String::from("NILL")), + ) + .into()) + } else if let Some(column) = column { + let schema = schema.clone(); + let index = Index::new(name, column, unique); + index + .reset(database, table_name, &schema.column_defs) + .await?; + database + .alter_table(table_name, SchemaDiff::new_add_index(index)) + .await + } else { + unreachable!() + } + } + } +} diff --git a/src/executor/alter_table/create_table.rs b/src/executor/alter_table/create_table.rs new file mode 100644 index 00000000..59cdc40d --- /dev/null +++ b/src/executor/alter_table/create_table.rs @@ -0,0 +1,37 @@ +use { + super::AlterError, + crate::{data::Schema, Column, ComplexTableName, Glue, Result}, + sqlparser::ast::{ColumnDef, ObjectName}, +}; + +impl Glue { + pub async fn create_table( + &mut self, + name: &ObjectName, + column_defs: &[ColumnDef], + if_not_exists: bool, + ) -> Result<()> { + let ComplexTableName { + name: table_name, + database, + .. + } = name.try_into()?; + + let schema = Schema { + table_name, + column_defs: column_defs.iter().cloned().map(Column::from).collect(), + indexes: vec![], + }; + + let database = &mut **self.get_mut_database(&database)?; + if database.fetch_schema(&schema.table_name).await?.is_some() { + if !if_not_exists { + Err(AlterError::TableAlreadyExists(schema.table_name.to_owned()).into()) + } else { + Ok(()) + } + } else { + database.insert_schema(&schema).await + } + } +} diff --git a/src/executor/alter_table/drop.rs b/src/executor/alter_table/drop.rs new file mode 100644 index 00000000..169dcecd --- /dev/null +++ b/src/executor/alter_table/drop.rs @@ -0,0 +1,44 @@ +use { + super::AlterError, + crate::{ComplexTableName, Glue, Result, ValueDefault}, + sqlparser::ast::{ObjectName, ObjectType}, +}; + +impl Glue { + pub async fn drop( + &mut self, + object_type: &ObjectType, + names: &[ObjectName], + if_exists: bool, + ) -> Result<()> { + if object_type != &ObjectType::Table { + return Err(AlterError::DropTypeNotSupported(object_type.to_string()).into()); + } + + for name in names.iter() { + let ComplexTableName { + name: table_name, + database, + .. + } = name.try_into()?; + + let database = &mut **self.get_mut_database(&database)?; + let schema = database.fetch_schema(&table_name).await?; + + if let Some(schema) = schema { + for column in schema.column_defs { + if matches!(column.default, Some(ValueDefault::AutoIncrement(_))) { + database + .set_increment_value(&table_name, &column.name, 1_i64) + .await?; + } + } + + database.delete_schema(&table_name).await?; + } else if !if_exists { + return Err(AlterError::TableNotFound(table_name.to_owned()).into()); + } + } + Ok(()) + } +} diff --git a/src/executor/alter_table/error.rs b/src/executor/alter_table/error.rs new file mode 100644 index 00000000..873beaec --- /dev/null +++ b/src/executor/alter_table/error.rs @@ -0,0 +1,42 @@ +use {serde::Serialize, std::fmt::Debug, thiserror::Error}; + +#[derive(Error, Serialize, Debug, PartialEq)] +pub enum AlterError { + // CREATE TABLE + #[error("table already exists: {0}")] + TableAlreadyExists(String), + + #[error("already exists: {0}")] + AlreadyExists(String), + + // ALTER TABLE + #[cfg(feature = "alter-table")] + #[error("unsupported alter table operation: {0}")] + UnsupportedAlterTableOperation(String), + + // DROP + #[error("drop type not supported: {0}")] + DropTypeNotSupported(String), + + #[error("table does not exist: {0}")] + TableNotFound(String), + + #[error("column {1} does not exist on table {0}")] + ColumnNotFound(String, String), + + // validate column def + #[error("unsupported data type: {0}")] + UnsupportedDataType(String), + + #[error("unsupported column option: {0}")] + UnsupportedColumnOption(String), + + #[error("unsupported number of index columns for new index '{0}'")] + UnsupportedNumberOfIndexColumns(String), + + #[error("column '{0}' of data type '{1}' is unsupported for unique constraint")] + UnsupportedDataTypeForUniqueColumn(String, String), + + #[error("column '{0}' of data type '{1}' is unsupported for auto increment constraint, only INTEGER is allowed")] + UnsupportedDataTypeForAutoIncrementColumn(String, String), +} diff --git a/src/executor/alter_table/mod.rs b/src/executor/alter_table/mod.rs new file mode 100644 index 00000000..7f7d9fc3 --- /dev/null +++ b/src/executor/alter_table/mod.rs @@ -0,0 +1,9 @@ +mod alter_table; +mod create_index; +mod create_table; +mod drop; +mod error; +mod truncate; +mod validate; +pub use error::AlterError; +use validate::validate; diff --git a/src/executor/alter_table/truncate.rs b/src/executor/alter_table/truncate.rs new file mode 100644 index 00000000..bee18c08 --- /dev/null +++ b/src/executor/alter_table/truncate.rs @@ -0,0 +1,38 @@ +use { + crate::{data::get_name, AlterError, DatabaseInner, Glue, Result, ValueDefault}, + futures::stream::{self, TryStreamExt}, + sqlparser::ast::ObjectName, +}; + +impl Glue { + pub async fn truncate(&mut self, table_name: &ObjectName) -> Result<()> { + let database = &mut **self.get_mut_database(&None)?; + let table_name = get_name(table_name)?; + let schema = database.fetch_schema(table_name).await?; + + if let Some(schema) = schema { + // TODO: We should be deleting the entry + #[cfg(feature = "auto-increment")] + let result: Result<&mut DatabaseInner> = stream::iter(schema.column_defs.iter().map(Ok)) + .try_fold(database, |database, column| async move { + if matches!(column.default, Some(ValueDefault::AutoIncrement(_))) { + database + .set_increment_value(table_name, &column.name, 1_i64) + .await?; + } + Ok(database) + }) + .await; + + #[cfg(feature = "auto-increment")] + let database = result?; + + // TODO: Maybe individual "truncate" operation + database.delete_schema(table_name).await?; // TODO: !!! This will delete INDEXes which it shouldn't! + database.insert_schema(&schema).await?; + Ok(()) + } else { + Err(AlterError::TableNotFound(table_name.to_owned()).into()) + } + } +} diff --git a/src/executor/alter_table/validate.rs b/src/executor/alter_table/validate.rs new file mode 100644 index 00000000..e657985e --- /dev/null +++ b/src/executor/alter_table/validate.rs @@ -0,0 +1,22 @@ +use { + crate::{AlterError, Result}, + sqlparser::ast::{ColumnDef, DataType}, +}; + +pub fn validate(column_def: &ColumnDef) -> Result<()> { + let ColumnDef { + data_type, + options: _, + name: _, + .. + } = column_def; + + if !matches!( + data_type, + DataType::Boolean | DataType::Int(_) | DataType::Float(_) | DataType::Text + ) { + return Err(AlterError::UnsupportedDataType(data_type.to_string()).into()); + } + + Ok(()) +} diff --git a/src/executor/execute.rs b/src/executor/execute.rs new file mode 100644 index 00000000..842f15e2 --- /dev/null +++ b/src/executor/execute.rs @@ -0,0 +1,170 @@ +use { + super::types::get_first_name, + crate::{parse_sql::Query, Glue, Result, Row}, + serde::Serialize, + sqlparser::ast::{ObjectType, Statement}, + thiserror::Error as ThisError, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum ExecuteError { + #[error("query not supported")] + QueryNotSupported, + + #[error("SET does not currently support columns, aggregates or subqueries")] + MissingComponentsForSet, + + #[error("unsupported insert value type: {0}")] + UnreachableUnsupportedInsertValueType(String), + + #[error("object not recognised")] + ObjectNotRecognised, + #[error("unimplemented")] + Unimplemented, + #[error("database already exists")] + DatabaseExists(String), + #[error("invalid file location")] + InvalidFileLocation, + #[error("invalid database location")] + InvalidDatabaseLocation, + + #[error("table does not exist")] + TableNotExists, + + #[error("column could not be found")] + ColumnNotFound, +} + +#[derive(Serialize, Debug, PartialEq)] +pub enum Payload { + Success, + Create, + Insert(usize), + Select { + labels: Vec, + rows: Vec, + }, + Delete(usize), + Update(usize), + DropTable, + #[cfg(feature = "alter-table")] + AlterTable, + TruncateTable, +} + +impl Glue { + pub async fn execute_query(&mut self, statement: &Query) -> Result { + let Query(statement) = statement; + + match statement { + Statement::CreateDatabase { + db_name, + if_not_exists, + location, + .. + } => { + if !self.try_extend_from_path( + db_name.0[0].value.clone(), + location + .clone() + .ok_or(ExecuteError::InvalidDatabaseLocation)?, + )? && !if_not_exists + { + Err(ExecuteError::DatabaseExists(db_name.0[0].value.clone()).into()) + } else { + Ok(Payload::Success) + } + } + //- Modification + //-- Tables + Statement::CreateTable { + name, + columns, + if_not_exists, + .. + } => self + .create_table(name, columns, *if_not_exists) + .await + .map(|_| Payload::Create), + Statement::Drop { + object_type, + names, + if_exists, + .. + } => match object_type { + ObjectType::Schema => { + // Schema for now // TODO: sqlparser-rs#454 + if !self.reduce(&get_first_name(names)?) && !if_exists { + Err(ExecuteError::ObjectNotRecognised.into()) + } else { + Ok(Payload::Success) + } + } + object_type => self + .drop(object_type, names, *if_exists) + .await + .map(|_| Payload::DropTable), + }, + #[cfg(feature = "alter-table")] + Statement::AlterTable { name, operation } => self + .alter_table(name, operation) + .await + .map(|_| Payload::AlterTable), + Statement::Truncate { table_name, .. } => self + .truncate(table_name) + .await + .map(|_| Payload::TruncateTable), + Statement::CreateIndex { + name, + table_name, + columns, + unique, + if_not_exists, + } => self + .create_index(table_name, name, columns, *unique, *if_not_exists) + .await + .map(|_| Payload::Create), + + //-- Rows + Statement::Insert { + table_name, + columns, + source, + .. + } => self.insert(table_name, columns, source, false).await, + Statement::Update { + table, + selection, + assignments, + // TODO + from: _, + } => self.update(table, selection, assignments).await, + Statement::Delete { + table_name, + selection, + } => self.delete(table_name, selection).await, + + //- Selection + Statement::Query(query_value) => { + let result = self.query(*query_value.clone()).await?; + let (labels, rows) = result; + let rows = rows.into_iter().map(Row).collect(); // I don't like this. TODO + let payload = Payload::Select { labels, rows }; + Ok(payload) + } + + //- Context + Statement::SetVariable { + variable, value, .. + } => self + .set_variable(variable, value) + .await + .map(|_| Payload::Success), + + Statement::ExplainTable { table_name, .. } => self.explain(table_name).await, + + Statement::Execute { name, parameters } => self.procedure(name, parameters).await, + _ => Err(ExecuteError::QueryNotSupported.into()), + } + } +} diff --git a/src/executor/fetch.rs b/src/executor/fetch.rs new file mode 100644 index 00000000..c28f9807 --- /dev/null +++ b/src/executor/fetch.rs @@ -0,0 +1,38 @@ +use { + super::types::{ColumnInfo, ComplexTableName}, + crate::{result::Result, Column, DatabaseInner}, + serde::Serialize, + thiserror::Error as ThisError, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum FetchError { + #[error("table not found: {0}")] + TableNotFound(String), +} + +pub async fn fetch_columns( + storage: &DatabaseInner, + table: ComplexTableName, +) -> Result> { + let schema = storage + .fetch_schema(&table.name) + .await? + .ok_or_else(|| FetchError::TableNotFound(table.name.clone()))?; + let columns = schema + .column_defs + .iter() + .map(|Column { name, .. }| { + let index = schema + .indexes + .iter() + .find_map(|index| (&index.column == name).then(|| index.name.clone())); + ColumnInfo { + table: table.clone(), + name: name.clone(), + index, + } + }) + .collect(); + Ok(columns) +} diff --git a/src/executor/mod.rs b/src/executor/mod.rs new file mode 100644 index 00000000..deada527 --- /dev/null +++ b/src/executor/mod.rs @@ -0,0 +1,20 @@ +mod alter_row; +mod alter_table; +mod execute; +mod fetch; +mod other; +mod procedure; +mod query; +mod recipe; +mod set_variable; +mod types; + +pub use { + alter_row::ValidateError, + alter_table::AlterError, + execute::{ExecuteError, Payload}, + fetch::FetchError, + query::{JoinError, ManualError, PlanError, QueryError, SelectError}, + recipe::*, + types::ComplexTableName, +}; diff --git a/src/executor/other/explain.rs b/src/executor/other/explain.rs new file mode 100644 index 00000000..da11764a --- /dev/null +++ b/src/executor/other/explain.rs @@ -0,0 +1,92 @@ +use crate::{DatabaseInner, ExecuteError, Payload, Row, Schema, Value}; +use crate::{Glue, Result}; +use sqlparser::ast::ObjectName; + +impl Glue { + pub async fn explain(&self, object: &ObjectName) -> Result { + let mut name_vec = object.0.clone(); + let (store_name, opt_table_name) = match name_vec.len() { + 2 => ( + Some(name_vec.remove(0).value), + Some(name_vec.remove(0).value), + ), + 1 => { + let name = name_vec.remove(0).value; + if name == "ALL" { + let databases: Vec = self + .get_database_list() + .into_iter() + .map(|name| Row(vec![name.clone().into()])) + .collect(); + return Ok(Payload::Select { + labels: vec![String::from("database")], + rows: databases, + }); + } + if name == "ALL_TABLE" { + let mut tables = vec![]; + for db_name in self.get_database_list().into_iter() { + tables.extend( + self.get_database(&Some(db_name.clone()))? + .get_tables() + .await? + .iter() + .map(|table| Row(vec![db_name.clone().into(), table.clone()])), + ); + } + return Ok(Payload::Select { + labels: vec![String::from("database"), String::from("table")], + rows: tables, + }); + } else if self.get_database_list().contains(&&name) { + (Some(name), None) + } else { + (None, Some(name)) + } + } + _ => return Err(ExecuteError::ObjectNotRecognised.into()), + }; + + let database = self.get_database(&store_name)?; + if let Some(table_name) = opt_table_name { + let Schema { column_defs, .. } = database + .fetch_schema(&table_name) + .await? + .ok_or(ExecuteError::ObjectNotRecognised)?; + let columns = column_defs + .iter() + .map(|column| { + ( + column.name.clone().into(), + column.data_type.to_string().into(), + ) + }) + .map(|(name, data_type)| Row(vec![name, data_type])) + .collect(); + Ok(Payload::Select { + labels: vec![String::from("column"), String::from("data_type")], + rows: columns, + }) + } else { + Ok(Payload::Select { + labels: vec![String::from("table")], + rows: database + .get_tables() + .await? + .into_iter() + .map(|table| Row(vec![table])) + .collect(), + }) + } + } +} +impl DatabaseInner { + async fn get_tables(&self) -> Result> { + Ok(self + .scan_schemas() + .await? + .into_iter() + .map(|Schema { table_name, .. }| table_name.into()) + .collect()) + } +} diff --git a/src/executor/other/mod.rs b/src/executor/other/mod.rs new file mode 100644 index 00000000..3ab13279 --- /dev/null +++ b/src/executor/other/mod.rs @@ -0,0 +1 @@ +mod explain; diff --git a/src/executor/procedure.rs b/src/executor/procedure.rs new file mode 100644 index 00000000..d7129666 --- /dev/null +++ b/src/executor/procedure.rs @@ -0,0 +1,25 @@ +use { + crate::{ExecuteError, Glue, Payload, Result}, + sqlparser::ast::{Expr, Ident, Value as AstValue}, +}; + +impl Glue { + pub async fn procedure(&mut self, name: &Ident, parameters: &[Expr]) -> Result { + return match name.value.as_str() { + "FILE" => { + if let Some(Ok(query)) = parameters.get(0).map(|path| { + if let Expr::Value(AstValue::SingleQuotedString(path)) = path { + std::fs::read_to_string(path).map_err(|_| ()) + } else { + Err(()) + } + }) { + self.execute(&query) + } else { + Err(ExecuteError::InvalidFileLocation.into()) + } + } + _ => Err(ExecuteError::Unimplemented.into()), + }; + } +} diff --git a/src/executor/query/mod.rs b/src/executor/query/mod.rs new file mode 100644 index 00000000..0ee20c48 --- /dev/null +++ b/src/executor/query/mod.rs @@ -0,0 +1,115 @@ +mod select; +mod set_expr; + +pub use select::{join::*, ManualError, PlanError, SelectError}; +use { + crate::{ + executor::types::LabelsAndRows, result::Result, Cast, Glue, MetaRecipe, RecipeUtilities, + Value, + }, + async_recursion::async_recursion, + serde::Serialize, + sqlparser::ast::{Cte, Query, TableAlias, With}, + thiserror::Error as ThisError, +}; + +const ENSURE_SIZE: bool = true; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum QueryError { + #[error("query not supported")] + QueryNotSupported, + #[error("values does not support columns, aggregates or subqueries")] + MissingComponentsForValues, + #[error("limit does not support columns, aggregates or subqueries")] + MissingComponentsForLimit, + #[error("offset does not support columns, aggregates or subqueries")] + MissingComponentsForOffset, + #[error("expected values but found none")] + NoValues, + #[error( + "UNION/EXCEPT/INTERSECT columns misaligned, sides should have an equal number of columns" + )] + OperationColumnsMisaligned, +} + +impl Glue { + #[async_recursion(?Send)] + pub async fn query(&mut self, query: Query) -> Result { + let Query { + body, + order_by, + limit, + offset, + with, + // TODO (below) + fetch: _, + lock: _, + } = query; + + let limit: Option = limit + .map(|expression| { + MetaRecipe::new(expression)? + .simplify_by_context(&*self.get_context()?)? + .confirm_or_err(QueryError::MissingComponentsForLimit.into())? + .cast() + }) + .transpose()?; + let offset: Option = offset + .map(|offset| { + MetaRecipe::new(offset.value)? + .simplify_by_context(&*self.get_context()?)? + .confirm_or_err(QueryError::MissingComponentsForOffset.into())? + .cast() + }) + .transpose()?; + + if let Some(with) = with { + let With { + recursive: _, // Recursive not currently supported + cte_tables, + } = with; + for cte in cte_tables.into_iter() { + let Cte { + alias, + query, + from: _, // What is `from` for? + } = cte; + let TableAlias { + name, + columns: _, // TODO: Columns - Check that number is same and then rename labels + } = alias; + let name = name.value; + let data = self.query(query).await?; + self.get_mut_context()?.set_table(name, data); + } + } + + let (mut labels, mut rows) = self.from_body(body, order_by).await?; + + if let Some(offset) = offset { + rows.drain(0..offset); + } + if let Some(limit) = limit { + rows.truncate(limit); + } + if ENSURE_SIZE { + let row_width = rows + .iter() + .map(|values_row| values_row.len()) + .max() + .unwrap_or(0); + if row_width > 0 { + rows = rows + .into_iter() + .map(|mut row| { + row.resize(row_width, Value::Null); + row + }) + .collect(); + labels.resize(row_width, String::new()) + }; + } + Ok((labels, rows)) + } +} diff --git a/src/executor/query/select/join/execute.rs b/src/executor/query/select/join/execute.rs new file mode 100644 index 00000000..62e6fe62 --- /dev/null +++ b/src/executor/query/select/join/execute.rs @@ -0,0 +1,142 @@ +use { + super::{JoinError, JoinMethod, JoinPlan, JoinType}, + crate::{ + executor::types::{ColumnInfo, Row}, + DatabaseInner, Glue, IndexFilter, Ingredient, MetaRecipe, Method, PlannedRecipe, Recipe, + Result, Value, + }, +}; + +#[derive(Debug)] +pub struct JoinExecute { + pub database: Option, + pub table: String, + pub method: JoinMethod, + pub join_type: JoinType, + pub widths: (usize, usize), + pub index_filter: Option, +} + +impl JoinExecute { + pub fn new( + plan: JoinPlan, + plane_columns: &[ColumnInfo], + index_filter: Option, + ) -> Result { + let JoinPlan { + database, + table, + join_type, + constraint, + columns, + .. + } = plan; + let widths = (plane_columns.len(), columns.len()); + let method = decide_method(constraint, columns, plane_columns)?; + Ok(Self { + database, + table, + method, + join_type, + widths, + index_filter, + }) + } + pub fn set_first_table(&mut self) { + self.method = JoinMethod::FirstTable; + } + pub async fn get_rows<'a>(&self, storage: &DatabaseInner) -> Result> { + if let Some(index_filter) = self.index_filter.clone() { + storage.scan_data_indexed(self.table.as_str(), index_filter) + } else { + storage.scan_data(self.table.as_str()) + } + .await + .map(|plane| { + plane + .into_iter() + .map(|(_, row)| row.0) + .collect::>() + }) + } + pub async fn execute<'a>(self, glue: &Glue, plane_rows: Vec) -> Result> { + let rows = + if let Some((.., context_table_rows)) = glue.get_context()?.tables.get(&self.table) { + Ok(context_table_rows.clone()) + } else { + self.get_rows(&**glue.get_database(&self.database)?).await + }?; + self.method.run( + &self.join_type, + self.widths.0, + self.widths.1, + plane_rows, + rows, + ) + } +} + +fn decide_method( + constraint: MetaRecipe, + self_columns: Vec, + plane_columns: &[ColumnInfo], +) -> Result { + Ok(match &constraint.recipe { + Recipe::Ingredient(Ingredient::Value(Value::Bool(true))) => JoinMethod::All, + Recipe::Method(method) => match **method { + Method::BinaryOperation( + operator, + Recipe::Ingredient(Ingredient::Column(index_l)), + Recipe::Ingredient(Ingredient::Column(index_r)), + ) if operator == Value::eq => { + // TODO: Be more strict, ensure that one column is from plane, and another from self. + let column_l = constraint + .meta + .objects + .get(index_l) + .ok_or(JoinError::Unreachable)? + .as_ref() + .ok_or(JoinError::Unreachable)?; + let column_r = constraint + .meta + .objects + .get(index_r) + .ok_or(JoinError::Unreachable)? + .as_ref() + .ok_or(JoinError::Unreachable)?; + + let (self_index, plane_index) = if let Some(self_index) = + self_columns.iter().position(|column| column == column_l) + { + let plane_index = plane_columns + .iter() + .position(|column| column == column_r) + .ok_or(JoinError::Unreachable)?; + (self_index, plane_index) + } else { + let self_index = self_columns + .iter() + .position(|column| column == column_r) + .ok_or(JoinError::Unreachable)?; + let plane_index = plane_columns + .iter() + .position(|column| column == column_l) + .ok_or(JoinError::Unreachable)?; + (self_index, plane_index) + }; + + JoinMethod::ColumnEqColumn { + plane_trust_ordered: false, + plane_index, + self_trust_ordered: false, + self_index, + } + } + // TODO: Methods for: + // (plan)Column = (other)Column AND (plan)Column = (other or otherother)Column + // (plan)Column = (other)Column OR (plan)Column = (other or otherother)Column + _ => JoinMethod::General(PlannedRecipe::new(constraint.clone(), plane_columns)?), + }, + _ => JoinMethod::Ignore, + }) +} diff --git a/src/executor/query/select/join/manual.rs b/src/executor/query/select/join/manual.rs new file mode 100644 index 00000000..8842b120 --- /dev/null +++ b/src/executor/query/select/join/manual.rs @@ -0,0 +1,53 @@ +use { + super::{JoinError, JoinType}, + crate::{ + executor::{types::ComplexTableName, MetaRecipe}, + Context, Result, + }, + sqlparser::ast::{Join as AstJoin, JoinConstraint, JoinOperator, TableFactor}, +}; + +#[derive(Debug, Clone)] +pub struct JoinManual { + pub table: ComplexTableName, + pub constraint: MetaRecipe, + pub join_type: JoinType, +} + +impl JoinManual { + pub fn new(join: AstJoin, context: &Context) -> Result { + let table = join.relation.try_into()?; + let (join_type, constraint) = Self::convert_join(join.join_operator)?; + let constraint = constraint.simplify_by_context(context)?; + Ok(Self { + table, + join_type, + constraint, + }) + } + pub fn new_implicit_join(table: TableFactor) -> Result { + let table = table.try_into()?; + let (join_type, constraint) = (JoinType::CrossJoin, MetaRecipe::TRUE); + Ok(Self { + table, + join_type, + constraint, + }) + } + fn convert_join(from: JoinOperator) -> Result<(JoinType, MetaRecipe)> { + let (join_type, constraint) = match from { + JoinOperator::Inner(constraint) => (JoinType::Inner, Some(constraint)), + JoinOperator::LeftOuter(constraint) => (JoinType::Left, Some(constraint)), + JoinOperator::RightOuter(constraint) => (JoinType::Right, Some(constraint)), + JoinOperator::FullOuter(constraint) => (JoinType::Full, Some(constraint)), + JoinOperator::CrossJoin => (JoinType::CrossJoin, None), + _ => return Err(JoinError::UnimplementedJoinType.into()), + }; + let constraint = match constraint { + Some(JoinConstraint::On(constraint)) => MetaRecipe::new(constraint)?, + Some(JoinConstraint::None) | None => MetaRecipe::TRUE, + _ => return Err(JoinError::UnimplementedJoinConstaint.into()), + }; + Ok((join_type, constraint)) + } +} diff --git a/src/executor/query/select/join/method.rs b/src/executor/query/select/join/method.rs new file mode 100644 index 00000000..e1616ba2 --- /dev/null +++ b/src/executor/query/select/join/method.rs @@ -0,0 +1,286 @@ +use { + super::JoinType, + crate::{ + executor::{types::Row, PlannedRecipe}, + macros::try_option, + JoinError, NullOrd, Result, Value, + }, + rayon::prelude::*, + std::{cmp::Ordering, fmt::Debug}, +}; + +macro_rules! unwrap_or_break { + ($unwrap: expr) => { + match $unwrap { + Some(value) => value, + None => { + break; + } + } + }; +} + +#[derive(Debug)] +pub enum JoinMethod { + General(PlannedRecipe), + ColumnEqColumn { + plane_index: usize, + plane_trust_ordered: bool, + self_index: usize, + self_trust_ordered: bool, + }, + All, + FirstTable, + Ignore, +} + +impl JoinMethod { + pub fn run( + self, + join: &JoinType, + left_width: usize, + right_width: usize, + mut plane_rows: Vec, + mut self_rows: Vec, + ) -> Result> { + // Very crucuial to have performant, needs *a lot* of optimisation. + Ok(match self { + JoinMethod::Ignore => plane_rows, + JoinMethod::FirstTable => self_rows, + JoinMethod::All => self_rows + .into_iter() + .fold(vec![], |mut result_rows, self_row| { + let joined_rows = plane_rows.clone().into_iter().map(|mut plane_row| { + plane_row.extend(self_row.clone()); + plane_row + }); + result_rows.extend(joined_rows); + result_rows + }), + JoinMethod::General(recipe) => { + let unfolded_rows = plane_rows + .into_par_iter() + .map(|left_row| { + let inner_rows = self_rows + .iter() + .enumerate() + .filter_map(|(index, right_row)| { + if try_option!(recipe.confirm_join_constraint(&left_row, right_row)) + { + Some(Ok(( + index, + join_parts(left_row.clone(), right_row.clone()), + ))) + } else { + None + } + }) + .collect::>>()?; + Ok(if inner_rows.is_empty() && join.includes_left() { + ( + vec![], + vec![join_parts(left_row, vec![Value::Null; right_width])], + ) + } else { + inner_rows.into_iter().unzip() + }) + }) + .collect::, Vec)>>>()?; + let (mut used_right_indexes, mut rows): (Vec, Vec) = unfolded_rows + .into_iter() + .reduce( + |mut all: (Vec, Vec), set: (Vec, Vec)| { + all.0.extend(set.0); + all.1.extend(set.1); + all + }, + ) + .unwrap_or((vec![], vec![])); + used_right_indexes.par_sort_unstable(); + used_right_indexes.dedup(); + self_rows.iter().enumerate().for_each(|(index, row)| { + if !used_right_indexes.iter().any(|used| used == &index) + && join.includes_right() + { + rows.push(join_parts(vec![Value::Null; left_width], row.clone())) + } + }); + rows + } + JoinMethod::ColumnEqColumn { + plane_index, + plane_trust_ordered, + self_index, + self_trust_ordered, + } => { + if !plane_trust_ordered { + plane_rows.par_sort_unstable_by(|row_l, row_r| { + row_l + .get(plane_index) + .and_then(|row_l| { + row_r + .get(plane_index) + .and_then(|row_r| row_l.null_cmp(row_r)) + }) + .unwrap_or(Ordering::Equal) + }); + } + + // partition + let mut left_partitions = plane_rows + .into_iter() + .fold( + vec![], + |mut partitions: Vec<(Value, Vec)>, row: Row| { + let value = row.get(plane_index).unwrap().clone(); // TODO: Handle + if let Some(last) = partitions.last_mut() { + if last.0 == value { + last.1.push(row); + } else { + partitions.push((value, vec![row])); + } + partitions + } else { + vec![(value, vec![row])] + } + }, + ) + .into_iter() + .peekable(); + + if !self_trust_ordered { + self_rows.par_sort_unstable_by(|row_l, row_r| { + row_l + .get(self_index) + .and_then(|row_l| { + row_r + .get(self_index) + .and_then(|row_r| row_l.null_cmp(row_r)) + }) + .unwrap_or(Ordering::Equal) + }); + } + + // partition + let mut right_partitions = self_rows + .into_iter() + .fold( + vec![], + |mut partitions: Vec<(Value, Vec)>, row: Row| { + let value = row.get(self_index).unwrap().clone(); // TODO: Handle + if let Some(last) = partitions.last_mut() { + if last.0 == value { + last.1.push(row); + } else { + partitions.push((value, vec![row])); + } + partitions + } else { + vec![(value, vec![row])] + } + }, + ) + .into_iter() + .peekable(); + + let mut left_results = vec![]; + let mut inner_results = vec![]; + let mut right_results = vec![]; + + loop { + // TODO: There's probably a better way to do this + match unwrap_or_break!(left_partitions.peek()) + .0 + .null_cmp(&unwrap_or_break!(right_partitions.peek()).0) + { + Some(Ordering::Less) => { + left_results + .push(left_partitions.next().ok_or(JoinError::Unreachable)?); + } + Some(Ordering::Equal) => { + inner_results.push(( + left_partitions.next().ok_or(JoinError::Unreachable)?, + right_partitions.next().ok_or(JoinError::Unreachable)?, + )); + } + None => { + left_results + .push(left_partitions.next().ok_or(JoinError::Unreachable)?); + right_results + .push(right_partitions.next().ok_or(JoinError::Unreachable)?); + } + Some(Ordering::Greater) => { + right_results + .push(right_partitions.next().ok_or(JoinError::Unreachable)?); + } + } + } + // In case any remain + left_results.extend(left_partitions); + right_results.extend(right_partitions); + + let left_rows = left_results + .into_par_iter() + .map(|(_, left_rows)| { + left_rows + .into_iter() + .map(|left| join_parts(left, vec![Value::Null; right_width])) + .collect::>() + }) + .reduce(Vec::new, |mut all, set| { + all.extend(set); + all + }); + + let mut inner_rows = inner_results + .into_par_iter() + .map(|((_, left_rows), (_, right_rows))| { + left_rows + .into_iter() + .map(|left| { + right_rows + .clone() + .into_iter() + .map(|right| join_parts(left.clone(), right)) + .collect() + }) + .reduce(|mut all: Vec, set| { + all.extend(set); + all + }) + .unwrap_or_default() + }) + .reduce(Vec::new, |mut all: Vec, set| { + all.extend(set); + all + }); + + let right_rows = right_results + .into_par_iter() + .map(|(_, right_rows)| { + right_rows + .into_iter() + .map(|right| join_parts(vec![Value::Null; left_width], right)) + .collect::>() + }) + .reduce(Vec::new, |mut all, set| { + all.extend(set); + all + }); + + if join.includes_left() { + inner_rows.extend(left_rows) + }; + if join.includes_right() { + inner_rows.extend(right_rows) + }; + inner_rows + } + }) + } +} + +fn join_parts(mut left: Vec, right: Vec) -> Vec { + left.extend(right); + left +} diff --git a/src/executor/query/select/join/mod.rs b/src/executor/query/select/join/mod.rs new file mode 100644 index 00000000..06391d78 --- /dev/null +++ b/src/executor/query/select/join/mod.rs @@ -0,0 +1,49 @@ +mod execute; +mod manual; +mod method; +mod plan; +use { + crate::executor::types::ComplexTableName, serde::Serialize, std::fmt::Debug, + thiserror::Error as ThisError, +}; +pub use {execute::JoinExecute, manual::JoinManual, method::JoinMethod, plan::JoinPlan}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum JoinError { + #[error("table '{0:?}' could not be found")] + TableNotFound(ComplexTableName), + #[error("column '{0:?}' could not be found")] + ColumnNotFound(Option>), + + #[error("join type not yet implemented")] + UnimplementedJoinType, + #[error("join constraint not yet implemented")] + UnimplementedJoinConstaint, + #[error("table type not yet implemented")] + UnimplementedTableType, + #[error("amount of components in identifier not yet supported")] + UnimplementedNumberOfComponents, + + #[error("this should be impossible, please report")] + UnreachableCellNotFound, + #[error("this should be impossible, please report")] + Unreachable, +} + +#[derive(Ord, Eq, PartialEq, PartialOrd, Debug, Clone)] +pub enum JoinType { + CrossJoin, // All join: NO FILTER + Inner, // Reduces rows so go first ideally -- has bugs for now + Left, + Right, + Full, +} + +impl JoinType { + pub fn includes_left(&self) -> bool { + matches!(self, JoinType::Left | JoinType::Full) + } + pub fn includes_right(&self) -> bool { + matches!(self, JoinType::Right | JoinType::Full) + } +} diff --git a/src/executor/query/select/join/plan.rs b/src/executor/query/select/join/plan.rs new file mode 100644 index 00000000..9af35cc3 --- /dev/null +++ b/src/executor/query/select/join/plan.rs @@ -0,0 +1,101 @@ +use { + super::{JoinManual, JoinType}, + crate::{ + executor::{ + fetch::fetch_columns, + types::{ColumnInfo, ComplexTableName}, + MetaRecipe, + }, + Glue, Result, + }, + std::cmp::Ordering, +}; + +#[derive(Debug)] +pub struct JoinPlan { + pub database: Option, + pub table: String, + pub columns: Vec, + pub join_type: JoinType, + pub constraint: MetaRecipe, + pub needed_tables: Vec, +} +impl PartialEq for JoinPlan { + fn eq(&self, _other: &Self) -> bool { + false + } +} +impl Eq for JoinPlan {} +impl PartialOrd for JoinPlan { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.join_type.cmp(&other.join_type)) + } +} +impl Ord for JoinPlan { + fn cmp(&self, other: &Self) -> Ordering { + self.join_type.cmp(&other.join_type) + } +} + +impl JoinPlan { + pub async fn new<'a>(join_manual: JoinManual, glue: &Glue) -> Result { + let JoinManual { + table, + constraint, + join_type, + } = join_manual; + let columns = get_columns(glue, table.clone()).await?; + let ComplexTableName { + database, + name: table, + .. + } = table; + Ok(Self { + database, + table, + join_type, + columns, + constraint, + needed_tables: vec![], + }) + } + pub fn calculate_needed_tables(&mut self, table_columns: &[Vec]) { + self.needed_tables = table_columns + .iter() + .enumerate() + .filter_map(|(index, columns)| { + if columns.iter().any(|table_column| { + self.constraint + .meta + .objects + .iter() + .any(|constraint_column| { + constraint_column + .as_ref() + .map(|constraint_column| table_column == constraint_column) + .unwrap_or(false) + }) + }) { + Some(index) + } else { + None + } + }) + .collect() + } +} + +async fn get_columns(glue: &Glue, table: ComplexTableName) -> Result> { + if let Some((context_table_labels, ..)) = glue.get_context()?.tables.get(&table.name) { + Ok(context_table_labels + .iter() + .map(|name| ColumnInfo { + table: table.clone(), + name: name.clone(), + index: None, + }) + .collect::>()) + } else { + fetch_columns(&**glue.get_database(&table.database)?, table).await + } +} diff --git a/src/executor/query/select/manual/mod.rs b/src/executor/query/select/manual/mod.rs new file mode 100644 index 00000000..8c8af41d --- /dev/null +++ b/src/executor/query/select/manual/mod.rs @@ -0,0 +1,164 @@ +use { + super::join::JoinManual, + crate::{ + executor::{ + types::{Alias, ObjectName}, + MetaRecipe, + }, + Context, Result, + }, + serde::Serialize, + sqlparser::ast::{Expr, Ident, Select, SelectItem as SelectItemAst}, + std::fmt::Debug, + thiserror::Error as ThisError, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum ManualError { + #[error("subqueries are not yet supported")] + UnimplementedSubquery, + + #[error("this should be impossible, please report")] + UncaughtASTError(String), + + #[error("this should be impossible, please report")] + Unreachable, +} + +pub struct Manual { + pub joins: Vec, + pub select_items: Vec, + pub constraint: MetaRecipe, + pub group_constraint: MetaRecipe, + pub groups: Vec, +} +pub enum SelectItem { + Recipe(MetaRecipe, Alias), + Wildcard(Option), +} + +impl Manual { + pub fn new(select: Select, context: &Context) -> Result { + let Select { + projection, + from, + selection, + group_by, + having, + // TODO (below) + distinct: _, + top: _, + lateral_views: _, + cluster_by: _, + distribute_by: _, + sort_by: _, + into: _, + } = select; + + let constraint = selection + .map(|selection| MetaRecipe::new(selection)?.simplify_by_context(context)) + .unwrap_or(Ok(MetaRecipe::TRUE))?; + + let group_constraint = having + .map(|having| MetaRecipe::new(having)?.simplify_by_context(context)) + .unwrap_or(Ok(MetaRecipe::TRUE))?; + + let groups = group_by + .into_iter() + .map(|expression| MetaRecipe::new(expression)?.simplify_by_context(context)) + .collect::>>()?; + + let (select_items, _subqueries): (Vec, Vec>) = projection + .into_iter() + .map(|select_item| convert_select_item(select_item, context)) + .collect::)>>>()? + .into_iter() + .unzip(); + + /*subqueries.push(constraint.meta.subqueries.clone()); + + let subqueries = subqueries + .into_iter() + .reduce(|mut all_subqueries, subqueries| { + all_subqueries.extend(subqueries); + all_subqueries + }) + .ok_or(ManualError::UncaughtASTError(String::from( + "Supposedly subqueries yet none found", + )))?; + // Subqueries TODO + // Issues: + // - Current method can expand plane on multiple match + // - No plane isolation (ambiguous columns because subquery columns and plane columns are treated the same) + if !subqueries.is_empty() { + return Err(ManualError::UnimplementedSubquery.into()); + }*/ + + let /*mut*/ joins = from + .into_iter() + .map(|from| { + let main = JoinManual::new_implicit_join(from.relation)?; + let mut joins = from + .joins + .into_iter() + .map(|join| JoinManual::new(join, context)) + .collect::>>()?; + joins.push(main); + Ok(joins) + }) + .collect::>>>()? + .into_iter() + .reduce(|mut all_joins, joins| { + all_joins.extend(joins); + all_joins + }) + .ok_or_else(||ManualError::UncaughtASTError(String::from("No tables")))?; + //joins.extend(subqueries); + //let joins = joins; + + Ok(Manual { + joins, + select_items, + constraint, + group_constraint, + groups, + }) + } +} + +fn identifier_into_object_name(identifier: Vec) -> ObjectName { + identifier + .into_iter() + .map(|identifier| identifier.value) + .collect() +} + +fn convert_select_item( + select_item: SelectItemAst, + context: &Context, +) -> Result<(SelectItem, Vec)> { + Ok(match select_item { + SelectItemAst::UnnamedExpr(_) | SelectItemAst::ExprWithAlias { .. } => { + let (expression, alias) = match select_item { + SelectItemAst::UnnamedExpr(expression) => { + let alias = if let Expr::Identifier(identifier) = expression.clone() { + Some(identifier.value) + } else { + None + }; + (expression, alias) + } + SelectItemAst::ExprWithAlias { expr, alias } => (expr, Some(alias.value)), + _ => unreachable!(), + }; + let recipe = MetaRecipe::new(expression)?.simplify_by_context(context)?; + let subqueries = recipe.meta.subqueries.clone(); + (SelectItem::Recipe(recipe, alias), subqueries) + } + SelectItemAst::Wildcard => (SelectItem::Wildcard(None), vec![]), + SelectItemAst::QualifiedWildcard(qualifier) => ( + SelectItem::Wildcard(Some(identifier_into_object_name(qualifier.0))), + vec![], + ), + }) +} diff --git a/src/executor/query/select/mod.rs b/src/executor/query/select/mod.rs new file mode 100644 index 00000000..af0e8e22 --- /dev/null +++ b/src/executor/query/select/mod.rs @@ -0,0 +1,185 @@ +pub mod join; +mod manual; +mod order; +mod plan; + +use { + crate::{ + executor::{ + types::{LabelsAndRows, Row}, + PlannedRecipe, + }, + macros::try_option, + Glue, RecipeUtilities, Result, Value, + }, + futures::stream::{self, StreamExt, TryStreamExt}, + rayon::prelude::*, + serde::Serialize, + sqlparser::ast::{OrderByExpr, Select}, + thiserror::Error as ThisError, +}; +pub use { + manual::{Manual, ManualError, SelectItem}, + order::Order, + plan::{Plan, PlanError}, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum SelectError { + #[error("aggregate groups not supported")] + GrouperMayNotContainAggregate, + + #[error("an aggregate was probably used where not allowed")] + FinalSolveFailure, + + #[error("HAVING does not yet support aggregates")] + UnimplementedAggregateHaving, + + #[error("this should be impossible, please report")] + UnreachableFinalSolveFailure, + #[error("this should be impossible, please report")] + Unreachable, +} + +impl Glue { + pub async fn select(&mut self, plan: Plan) -> Result { + let Plan { + joins, + select_items, + constraint, + group_constraint, + groups, + order_by, + labels, + } = plan; + let rows = stream::iter(joins) + .map(Ok) + .try_fold(vec![], |rows, join| async { + join.execute(self, rows).await + }) + .await?; + + let rows = order_by.execute(rows)?; // TODO: This should be done after filtering + + let selected_rows = + rows.into_par_iter() + .filter_map(|row| match constraint.confirm_constraint(&row) { + Ok(true) => Some( + select_items + .clone() + .into_iter() + .map(|selection| selection.simplify_by_row(&row)) + .collect::>>() + .map(|selection| (selection, row)), + ), + Ok(false) => None, + Err(error) => Some(Err(error)), + }); + let do_group = !groups.is_empty() + || select_items + .iter() + .any(|select_item| !select_item.aggregates.is_empty()); + + let final_rows = if do_group { + let groups = if groups.is_empty() { + vec![PlannedRecipe::TRUE] + } else { + groups + }; + + let accumulations: Vec<(Vec, Option, Vec)> = + selected_rows + .filter_map(|selection| { + let (selected_row, row) = try_option!(selection); + let group_constraint = + try_option!(group_constraint.clone().simplify_by_row(&row)); + let group_constraint = match group_constraint.as_solution() { + Some(Value::Bool(true)) => None, + Some(Value::Bool(false)) => return None, + Some(_) => unreachable!(), // TODO: Handle + None => Some(group_constraint), + }; + let groupers = try_option!(groups + .iter() + .map(|group| { + group.clone().simplify_by_row(&row)?.confirm_or_err( + SelectError::GrouperMayNotContainAggregate.into(), + ) + }) + .collect::>>()); + Some(Ok((groupers, group_constraint, selected_row))) + }) + .map::<_, Result<_>>(|acc| acc.map(|acc| vec![acc])) + .try_reduce_with(accumulate) + .unwrap_or(Ok(vec![]))?; // TODO: Improve + + accumulations + .into_par_iter() + .map(|(_grouper, _group_constraint, vals)| { + vals.into_iter() + .map(|val| val.finalise_accumulation()) + .collect::>>() + }) + .collect::>>>()? + // TODO: Manage grouper and constraint + } else { + selected_rows + .map(|selection| { + selection.and_then(|(selection, _)| { + selection + .into_iter() + .map(|selected| selected.confirm()) + .collect::>() + }) + }) + .collect::>>()? + }; + + Ok((labels, final_rows)) + } + pub async fn select_query( + &mut self, + query: Select, + order_by: Vec, + ) -> Result { + let plan = Plan::new(self, query, order_by).await?; + self.select(plan).await + } +} + +#[allow(clippy::type_complexity)] // TODO +fn accumulate( + mut rows_l: Vec<(Vec, Option, Vec)>, + rows_r: Vec<(Vec, Option, Vec)>, +) -> Result, Option, Vec)>> { + rows_r.into_iter().try_for_each::<_, Result<_>>(|row_r| { + let (grouper, group_constraint, vals) = row_r; + let group_index = rows_l.iter().position(|(group, _, _)| group == &grouper); + let new_group = if let Some(group_index) = group_index { + let (group_grouper, group_group_constraint, group_vals) = + rows_l.swap_remove(group_index); + /*rows_l[group_index].1.map(|constraint| { + if let Some(group_constraint) = group_constraint { + constraint.accumulate(group_constraint).unwrap() // TODO: Handle + }; + });*/ + // TODO + + let group_vals = group_vals + .into_iter() + .zip(vals.into_iter()) + .map(|(mut col, val)| { + col.accumulate(val)?; + Ok(col) + }) + .collect::>()?; + (group_grouper, group_group_constraint, group_vals) + } else { + (grouper, group_constraint, vals) + }; + rows_l.push(new_group); + Ok(()) + })?; + + Ok(rows_l) +} diff --git a/src/executor/query/select/order.rs b/src/executor/query/select/order.rs new file mode 100644 index 00000000..bd8e005d --- /dev/null +++ b/src/executor/query/select/order.rs @@ -0,0 +1,118 @@ +use { + crate::{ + executor::types::{ColumnInfo, Row}, + MetaRecipe, PlannedRecipe, RecipeUtilities, Result, Value, + }, + rayon::prelude::*, + sqlparser::ast::OrderByExpr, + std::cmp::Ordering, +}; + +pub struct Order(Vec); +impl Order { + pub fn new(order_by: Vec, columns: &[ColumnInfo]) -> Result { + let order_items = order_by + .into_iter() + .map(|order_by_item| PlannedOrderItem::new(order_by_item, columns)) + .collect::>>()?; + Ok(Order(order_items)) + } + pub fn execute(self, rows: Vec) -> Result> { + // TODO: Optimise + if self.0.is_empty() { + return Ok(rows); + } + + let (order_terms, order_item_recipes): (Vec, Vec) = self + .0 + .into_iter() + .map(|planned_order_item| { + let PlannedOrderItem(order_term, recipe) = planned_order_item; + (order_term, recipe) + }) + .unzip(); + let order_terms = OrderTerms(order_terms); + + let mut order_rows = rows + .into_par_iter() + .map(|row| { + let order_row = order_item_recipes + .clone() + .into_iter() + .map(|recipe| recipe.simplify_by_row(&row)?.confirm()) + .collect::>>(); + order_row.map(|order_row| (row, order_row)) + }) + .collect::)>>>()?; + + order_rows.par_sort_unstable_by(|(_, order_row_a), (_, order_row_b)| { + order_terms.sort(order_row_a, order_row_b) + }); + Ok(order_rows.into_iter().map(|(row, _)| row).collect()) + } +} + +struct PlannedOrderItem(OrderTerm, PlannedRecipe); +impl PlannedOrderItem { + pub fn new(order_by_item: OrderByExpr, columns: &[ColumnInfo]) -> Result { + let OrderByExpr { + expr, + asc, + nulls_first, + } = order_by_item; + let recipe = PlannedRecipe::new(MetaRecipe::new(expr)?.simplify_by_basic()?, columns)?; + let is_asc = asc.unwrap_or(true); + let prefer_nulls = nulls_first.unwrap_or(false); + + Ok(PlannedOrderItem( + OrderTerm { + is_asc, + prefer_nulls, + }, + recipe, + )) + } +} + +#[derive(Clone)] +struct OrderTerm { + pub is_asc: bool, + pub prefer_nulls: bool, +} +impl OrderTerm { + pub fn sort(&self, order_item_a: &Value, order_item_b: &Value) -> Option { + let order = match (order_item_a, order_item_b) { + (Value::Null, Value::Null) => Ordering::Equal, + (Value::Null, _) | (_, Value::Null) => { + if self.prefer_nulls { + Ordering::Greater + } else { + Ordering::Less + } + } + (other_a, other_b) => other_a.partial_cmp(other_b).unwrap_or(Ordering::Equal), + }; + if order == Ordering::Equal { + None + } else if self.is_asc { + Some(order) + } else { + Some(order.reverse()) + } + } +} + +struct OrderTerms(Vec); + +impl OrderTerms { + pub fn sort(&self, order_items_a: &[Value], order_items_b: &[Value]) -> Ordering { + order_items_a + .iter() + .zip(order_items_b) + .zip(self.0.clone()) + .find_map(|((order_item_a, order_item_b), order_term)| { + order_term.sort(order_item_a, order_item_b) + }) + .unwrap_or(Ordering::Equal) + } +} diff --git a/src/executor/query/select/plan/mod.rs b/src/executor/query/select/plan/mod.rs new file mode 100644 index 00000000..d77c5aae --- /dev/null +++ b/src/executor/query/select/plan/mod.rs @@ -0,0 +1,206 @@ +use { + super::{ + join::{JoinExecute, JoinPlan}, + Manual, Order, SelectItem, + }, + crate::{ + executor::{types::ColumnInfo, PlannedRecipe}, + Glue, Result, + }, + futures::future::join_all, + serde::Serialize, + sqlparser::ast::{OrderByExpr, Select}, + thiserror::Error as ThisError, +}; + +pub struct Plan { + pub joins: Vec, + pub select_items: Vec, + pub constraint: PlannedRecipe, + pub groups: Vec, + pub group_constraint: PlannedRecipe, + pub order_by: Order, + pub labels: Vec, +} + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum PlanError { + #[error("this should be impossible, please report")] + UnreachableNoColumns, + #[error("this should be impossible, please report")] + UnreachableNoSelectItems, + #[error("this should be impossible, please report")] + Unreachable, +} + +impl Plan { + pub async fn new(glue: &Glue, select: Select, order_by: Vec) -> Result { + let Manual { + joins, + select_items, + constraint, + group_constraint, + groups, + } = Manual::new(select, &*glue.get_context()?)?; + + let mut joins: Vec = join_all( + joins + .into_iter() + .map(|join| JoinPlan::new(join, glue)) + .collect::>(), + ) + .await + .into_iter() + .collect::>>()?; + + joins.sort_unstable(); + let table_columns = joins + .iter() + .map(|join| join.columns.clone()) + .collect::>>(); + let joins = joins + .into_iter() + .map(|mut join| { + join.calculate_needed_tables(&table_columns); + join + }) + .enumerate() + .collect(); + + let mut needed_joins: Vec<(usize, JoinPlan)> = joins; + let mut requested_joins: Vec<(usize, JoinPlan)> = vec![]; + let mut len_last: usize; + let mut len = 0; + loop { + len_last = len; + len = needed_joins.len(); + if needed_joins.is_empty() { + break; + } + let needed_joins_iter = needed_joins.into_iter(); + needed_joins = vec![]; + needed_joins_iter.for_each(|(needed_index, join)| { + if !join.needed_tables.iter().any(|needed_table_index| { + !(&needed_index == needed_table_index + || requested_joins + .iter() + .any(|(requested_index, _)| needed_table_index == requested_index)) + }) { + requested_joins.push((needed_index, join)) + } else { + if len == len_last { + // TODO + panic!( + "Impossible Join, table not present or tables require eachother: {:?}", + join + ) + // TODO: Handle + } + needed_joins.push((needed_index, join)) + } + }); + } + let columns = requested_joins + .iter() + .fold(vec![], |mut columns, (index, _)| { + columns.extend( + table_columns + .get(*index) + .expect("Something went very wrong") + .clone(), + ); + columns + }); + + let (constraint, mut index_filters) = PlannedRecipe::new_constraint(constraint, &columns)?; + + let mut joins = requested_joins + .into_iter() + .map(|(_, join)| { + let index_filter = index_filters.remove(&join.table); + JoinExecute::new(join, &columns, index_filter) + }) + .collect::>>()?; + + if let Some(first) = joins.first_mut() { + first.set_first_table() + } + + let include_table = joins.len() != 1; + let select_items = select_items + .into_iter() + .enumerate() + .map(|(index, select_item)| { + Ok(match select_item { + SelectItem::Recipe(meta_recipe, alias) => { + let recipe = PlannedRecipe::new(meta_recipe, &columns)?; + let label = alias + .unwrap_or_else(|| recipe.get_label(index, include_table, &columns)); + vec![(recipe, label)] + } + SelectItem::Wildcard(specifier) => { + let specified_table = + specifier.and_then(|specifier| specifier.get(0).cloned()); + let matches_table = |column: &ColumnInfo| { + specified_table + .clone() + .map(|specified_table| { + column.table.name == specified_table + || column + .table + .alias + .clone() + .map(|alias| alias == specified_table) + .unwrap_or(false) + }) + .unwrap_or(true) + }; + columns + .iter() + .enumerate() + .filter_map(|(index, column)| { + if matches_table(column) { + Some(( + PlannedRecipe::of_index(index), + if include_table { + format!("{}.{}", column.table.name, column.name) + } else { + column.name.clone() + }, + )) + } else { + None + } + }) + .collect() + } + }) + }) + .collect::>>>()? // TODO: Don't do this + .into_iter() + .reduce(|mut select_items, select_item_set| { + select_items.extend(select_item_set); + select_items + }) + .ok_or(PlanError::UnreachableNoSelectItems)?; + + let (select_items, labels) = select_items.into_iter().unzip(); + + let group_constraint = PlannedRecipe::new(group_constraint, &columns)?; + let groups = groups + .into_iter() + .map(|group| PlannedRecipe::new(group, &columns)) + .collect::>>()?; + let order_by = Order::new(order_by, &columns)?; + + Ok(Plan { + joins, + select_items, + constraint, + groups, + group_constraint, + order_by, + labels, + }) + } +} diff --git a/src/executor/query/set_expr.rs b/src/executor/query/set_expr.rs new file mode 100644 index 00000000..737a2117 --- /dev/null +++ b/src/executor/query/set_expr.rs @@ -0,0 +1,96 @@ +use { + super::QueryError, + crate::{ + executor::types::LabelsAndRows, macros::warning, result::Result, Glue, MetaRecipe, Payload, + RecipeUtilities, Value, + }, + async_recursion::async_recursion, + sqlparser::ast::{OrderByExpr, SetExpr, SetOperator, Statement}, +}; + +impl Glue { + #[async_recursion(?Send)] + pub async fn from_body( + &mut self, + body: SetExpr, + order_by: Vec, + ) -> Result { + match body { + SetExpr::Select(query) => { + let (labels, rows) = self.select_query(*query, order_by).await?; + Ok((labels, rows)) + } + SetExpr::Values(values) => { + if !order_by.is_empty() { + warning!("VALUES does not currently support ordering"); + } + let values = values.0; + values + .into_iter() + .map(|values_row| { + values_row + .into_iter() + .map(|cell| { + MetaRecipe::new(cell)? + .simplify_by_context(&*self.get_context()?)? + .confirm_or_err(QueryError::MissingComponentsForValues.into()) + }) + .collect::>>() + }) + .collect::>>>() + .map(|values| { + ( + (0..values.get(0).map(|first_row| first_row.len()).unwrap_or(0)) + .map(|index| format!("unnamed_{}", index)) + .collect(), + values, + ) + }) + } + SetExpr::SetOperation { + op, + all, + left, + right, + } => { + use SetOperator::*; + if !order_by.is_empty() { + warning!( + "set operations (UNION, EXCEPT & INTERSECT) do not currently support ordering" + ); + } + let (left_labels, left) = self.from_body(*left, vec![]).await?; + let (right_labels, right) = self.from_body(*right, vec![]).await?; + if left_labels.len() != right_labels.len() { + return Err(QueryError::OperationColumnsMisaligned.into()); + } + let mut rows = match op { + Union => [left, right].concat(), + Except => left + .into_iter() + .filter(|row| !right.contains(row)) + .collect(), + Intersect => left.into_iter().filter(|row| right.contains(row)).collect(), + }; + if !all { + rows.dedup(); + } + Ok((left_labels, rows)) + } + SetExpr::Insert(Statement::Insert { + table_name, + columns, + source, + .. + }) => { + let inserted = self.insert(&table_name, &columns, &source, true).await?; + if let Payload::Select { labels, rows } = inserted { + Ok((labels, rows.into_iter().map(|row| row.0).collect())) + } else { + unreachable!(); // TODO: Handle + } + } + _ => Err(QueryError::QueryNotSupported.into()), // TODO: Other queries + } + } +} diff --git a/src/executor/recipe/from.rs b/src/executor/recipe/from.rs new file mode 100644 index 00000000..70308f35 --- /dev/null +++ b/src/executor/recipe/from.rs @@ -0,0 +1,111 @@ +use { + super::{AggregateOperator, BinaryOperator, FunctionOperator, RecipeError, UnaryOperator}, + crate::{Result, Value}, + sqlparser::ast::{BinaryOperator as AstBinaryOperator, UnaryOperator as AstUnaryOperator}, +}; + +pub trait TryIntoMethod { + fn into_method(self) -> Result; +} + +impl TryIntoMethod for String { + fn into_method(self) -> Result { + match self.to_uppercase().as_str() { + "CONVERT" => Ok(Value::function_convert), + "TRY_CONVERT" => Ok(Value::function_try_convert), + + "UPPER" => Ok(Value::function_to_uppercase), + "LOWER" => Ok(Value::function_to_lowercase), + + "LEFT" => Ok(Value::function_left), + "RIGHT" => Ok(Value::function_right), + + "LEN" => Ok(Value::function_length), + "CONCAT" => Ok(Value::function_concat), + "REPLACE" => Ok(Value::function_replace), + + "NOW" => Ok(Value::function_now), + + "YEAR" => Ok(Value::function_year), + "MONTH" => Ok(Value::function_month), + "DAY" => Ok(Value::function_day), + "HOUR" => Ok(Value::function_hour), + "MINUTE" => Ok(Value::function_minute), + "SECOND" => Ok(Value::function_second), + + "DATEADD" => Ok(Value::function_timestamp_add), + "DATEFROMPARTS" => Ok(Value::function_timestamp_from_parts), + + "ROUND" => Ok(Value::function_round), + "POW" => Ok(Value::function_pow), + + "RAND" => Ok(Value::function_rand), + "UUID" => Ok(Value::function_rand), + + "IIF" => Ok(Value::function_iif), + "IFNULL" => Ok(Value::function_if_null), + "NULLIF" => Ok(Value::function_null_if), + + unimplemented => { + Err(RecipeError::UnimplementedMethod(String::from(unimplemented)).into()) + } + } + } +} + +impl TryIntoMethod for String { + fn into_method(self) -> Result { + match self.to_uppercase().as_str() { + "COUNT" => Ok(Value::aggregate_count), + "MIN" => Ok(Value::aggregate_min), + "MAX" => Ok(Value::aggregate_max), + "SUM" => Ok(Value::aggregate_sum), + + unimplemented => { + Err(RecipeError::UnimplementedMethod(String::from(unimplemented)).into()) + } + } + } +} + +impl TryIntoMethod for AstUnaryOperator { + fn into_method(self) -> Result { + match self { + AstUnaryOperator::Plus => Ok(Value::generic_unary_plus), + AstUnaryOperator::Minus => Ok(Value::generic_unary_minus), + AstUnaryOperator::Not => Ok(Value::not), + + unimplemented => { + Err(RecipeError::UnimplementedMethod(format!("{:?}", unimplemented)).into()) + } + } + } +} + +impl TryIntoMethod for AstBinaryOperator { + fn into_method(self) -> Result { + match self { + AstBinaryOperator::Plus => Ok(Value::generic_add), + AstBinaryOperator::Minus => Ok(Value::generic_subtract), + AstBinaryOperator::Multiply => Ok(Value::generic_multiply), + AstBinaryOperator::Divide => Ok(Value::generic_divide), + AstBinaryOperator::Modulo => Ok(Value::generic_modulus), + + AstBinaryOperator::And => Ok(Value::and), + AstBinaryOperator::Or => Ok(Value::or), + + AstBinaryOperator::Eq => Ok(Value::eq), + AstBinaryOperator::NotEq => Ok(Value::not_eq), + AstBinaryOperator::Gt => Ok(Value::gt), + AstBinaryOperator::GtEq => Ok(Value::gt_eq), + AstBinaryOperator::Lt => Ok(Value::lt), + AstBinaryOperator::LtEq => Ok(Value::lt_eq), + + AstBinaryOperator::StringConcat => Ok(Value::string_concat), + + unimplemented => { + Err(RecipeError::UnimplementedMethod(format!("{:?}", unimplemented)).into()) + } + } + } +} diff --git a/src/executor/recipe/mod.rs b/src/executor/recipe/mod.rs new file mode 100644 index 00000000..a23ba379 --- /dev/null +++ b/src/executor/recipe/mod.rs @@ -0,0 +1,157 @@ +mod from; +mod new; +mod planned; +mod resolve; + +use { + crate::{executor::types::ObjectName, Error, Result, Value}, + serde::Serialize, + sqlparser::ast::{DataType, Expr}, + std::fmt::Debug, + thiserror::Error as ThisError, +}; +pub use { + from::TryIntoMethod, + new::MetaRecipe, + planned::PlannedRecipe, + resolve::{Resolve, SimplifyBy}, +}; + +#[derive(ThisError, Serialize, Debug, PartialEq)] +pub enum RecipeError { + #[error("recipe missing components")] + MissingComponents, + + #[error("{0} is either invalid or unimplemented")] + InvalidQuery(String), + #[error("{0} is invalid or unimplemented")] + InvalidExpression(Expr), + #[error("a function is either invalid or unimplemented")] + InvalidFunction, + + #[error("column '{0:?}' could not be found")] + MissingColumn(ObjectName), + #[error("column '{0:?}' could mean various different columns, please be more specific with (table).(column)")] + AmbiguousColumn(ObjectName), + + #[error("{0} is either invalid or unimplemented")] + UnimplementedQuery(String), + #[error("{0} is either invalid or unimplemented")] + UnimplementedMethod(String), + #[error("{0} is unimplemented")] + UnimplementedExpression(Expr), + #[error("something is unimplemented")] + Unimplemented, + + #[error("other failure occurred: {0}")] + Failed(String), + + #[error("this should be impossible, please report")] + UnreachableAggregatationFailed, + #[error("this should be impossible, please report")] + UnreachableAggregateFailed, + #[error("this should be impossible, please report, failure: {0}")] + UnreachableNotMethod(String), + #[error("this should be impossible, please report, failure: {0}")] + UnreachableNotAggregate(String), + #[error("this should be impossible, please report")] + UnreachableNoRow, + #[error("this should be impossible, please report")] + Unreachable, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Recipe { + Ingredient(Ingredient), + Method(Box), +} + +impl Default for Recipe { + fn default() -> Self { + Self::NULL + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Ingredient { + Value(Value), + Column(usize), + Aggregate(usize), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Method { + Value(Value), // Only occurs backwards for eval! Should never be returned outside of a recursive simplification! + Aggregate(AggregateOperator, Recipe), // Only occurs inside Ingredient::Aggregate. Perhaps this should not be a Method. + + UnaryOperation(UnaryOperator, Recipe), + BinaryOperation(BinaryOperator, Recipe, Recipe), + Function(FunctionOperator, Vec), + + Cast(DataType, Recipe), + + Case { + operand: Option, + cases: Vec<(Recipe, Recipe)>, + else_result: Option, + }, +} + +// Cannot derive Debug for references. Perhaps these shouldn't consume their operators. TODO. +pub type UnaryOperator = fn(Value) -> Result; +pub type BinaryOperator = fn(Value, Value) -> Result; +pub type FunctionOperator = fn(Vec) -> Result; +pub type AggregateOperator = fn(Value, Value) -> Result; + +pub trait RecipeUtilities +where + Self: Sized, +{ + fn as_solution(&self) -> Option; + + fn confirm_or_err(self, error: Error) -> Result { + self.as_solution().ok_or(error) + } + + fn confirm(self) -> Result { + self.confirm_or_err(RecipeError::MissingComponents.into()) + } + + fn simplify_by_basic(self) -> Result; +} +impl RecipeUtilities for Recipe { + fn as_solution(&self) -> Option { + if let Recipe::Ingredient(Ingredient::Value(value)) = self { + Some(value.clone()) + } else { + None + } + } + fn simplify_by_basic(self) -> Result { + self.simplify(SimplifyBy::Basic) + } +} +impl RecipeUtilities for MetaRecipe { + fn as_solution(&self) -> Option { + self.recipe.as_solution() + } + fn simplify_by_basic(mut self) -> Result { + self.recipe = self.recipe.simplify(SimplifyBy::Basic)?; + Ok(self) + } +} +impl RecipeUtilities for PlannedRecipe { + fn as_solution(&self) -> Option { + self.recipe.as_solution() + } + fn simplify_by_basic(mut self) -> Result { + self.recipe = self.recipe.simplify(SimplifyBy::Basic)?; + Ok(self) + } +} + +impl Recipe { + pub const NULL: Recipe = Recipe::Ingredient(Ingredient::Value(Value::Null)); + pub const TRUE: Recipe = Recipe::Ingredient(Ingredient::Value(Value::Bool(true))); + pub const SINGLE_COLUMN: Recipe = Recipe::Ingredient(Ingredient::Column(0)); +} diff --git a/src/executor/recipe/new.rs b/src/executor/recipe/new.rs new file mode 100644 index 00000000..af3d4537 --- /dev/null +++ b/src/executor/recipe/new.rs @@ -0,0 +1,325 @@ +use { + super::{Ingredient, Method, Recipe, RecipeError, TryIntoMethod}, + crate::{ + executor::{query::JoinManual, types::ObjectName}, + Context, Resolve, Result, SimplifyBy, Value, + }, + sqlparser::ast::{Expr, FunctionArg, FunctionArgExpr, Ident}, + std::convert::TryFrom, +}; +// TODO: #50 - imports: JoinType, SelectItem, SetExpr, ComplexTableName + +#[derive(Debug, Clone)] +pub struct MetaRecipe { + pub recipe: Recipe, + pub meta: RecipeMeta, +} +impl MetaRecipe { + pub fn new(expression: Expr) -> Result { + let (recipe, meta) = Recipe::new_with_meta(expression)?; + Ok(Self { recipe, meta }) + } + pub fn simplify_by_context(self, context: &Context) -> Result { + let meta_objects = self.meta.objects.clone(); + let (meta_objects, row) = meta_objects + .into_iter() + .map(|object_name| { + object_name + .clone() + .and_then(|object_name| { + if object_name.len() == 1 { + context.variables.get(&object_name[0]).map(Clone::clone) + } else { + None + } + }) + .map(|value| (None, Some(value))) + .unwrap_or((object_name, None)) + }) + .unzip(); + let mut meta = self.meta; + meta.objects = meta_objects; + let recipe = self.recipe.simplify(SimplifyBy::OptRow(&row))?; + Ok(Self { recipe, meta }) + } +} +impl MetaRecipe { + pub const NULL: Self = MetaRecipe { + recipe: Recipe::Ingredient(Ingredient::Value(Value::Null)), + meta: RecipeMeta::NEW, + }; + pub const TRUE: Self = MetaRecipe { + recipe: Recipe::Ingredient(Ingredient::Value(Value::Bool(true))), + meta: RecipeMeta::NEW, + }; +} + +#[derive(Debug, Clone)] +pub struct RecipeMeta { + pub objects: Vec>, + pub aggregates: Vec, + pub subqueries: Vec, +} +impl RecipeMeta { + pub const NEW: Self = Self { + objects: vec![], + aggregates: vec![], + subqueries: vec![], + }; + fn append_column(&mut self, column: ObjectName) { + self.objects.push(Some(column)); + } + fn append_aggregate(&mut self, aggregate: Recipe) { + self.aggregates.push(aggregate); + } + /* TODO: #50 + fn append_subquery(&mut self, subquery: JoinManual) { + self.subqueries.push(subquery); + }*/ + fn find_column(&self, column: &ObjectName) -> Option { + self.objects.iter().position(|search_column| { + search_column + .as_ref() + .map(|search_column| column == search_column) + .unwrap_or(false) + }) + } + pub fn find_or_append_column(&mut self, column: ObjectName) -> usize { + self.find_column(&column).unwrap_or({ + self.append_column(column); + self.objects.len() - 1 + }) + } + pub fn aggregate(&mut self, aggregate: Recipe) -> Recipe { + self.append_aggregate(aggregate); + let index = self.aggregates.len() - 1; + Recipe::Ingredient(Ingredient::Aggregate(index)) + } + /* TODO: #50 + pub fn subquery(&mut self, subquery: Subquery) -> Result { + let result = subquery.column; + let table = subquery.table; + let join_type = JoinType::Left; + let constraint = subquery + .constraint + .map(MetaRecipe::new) + .unwrap_or(Ok(MetaRecipe::NULL))?; + let subquery = JoinManual { + table, + join_type, + constraint, + }; + self.append_subquery(subquery); + Ok(result) + }*/ + pub fn aggregate_average(&mut self, argument: Recipe) -> Recipe { + Recipe::Method(Box::new(Method::BinaryOperation( + Value::generic_divide, + self.aggregate(Recipe::Method(Box::new(Method::Aggregate( + Value::aggregate_sum, + argument.clone(), + )))), + self.aggregate(Recipe::Method(Box::new(Method::Aggregate( + Value::aggregate_count, + argument, + )))), + ))) + } +} + +/* TODO: #50 +pub struct Subquery { + pub table: ComplexTableName, + pub column: Recipe, + pub constraint: Option, +}*/ + +impl Recipe { + pub fn new_without_meta(expression: Expr) -> Result { + Self::new_with_meta(expression).map(|(new, _)| new) + } + fn new_with_meta(expression: Expr) -> Result<(Self, RecipeMeta)> { + let mut meta = RecipeMeta::NEW; + Ok((Self::with_meta(expression, &mut meta)?, meta)) + } + fn with_meta(expression: Expr, meta: &mut RecipeMeta) -> Result { + let error_expression_clone = expression.clone(); + match expression { + Expr::Identifier(identifier) => Ok(Self::from_column( + identifier_into_object_name(vec![identifier]), + meta, + )), + Expr::CompoundIdentifier(identifier) => Ok(Self::from_column( + identifier_into_object_name(identifier), + meta, + )), + Expr::Value(value) => Ok(Recipe::Ingredient(Ingredient::Value(Value::try_from( + &value, + )?))), + Expr::IsNull(expression) => Ok(Recipe::Method(Box::new(Method::UnaryOperation( + Value::is_null, + Self::with_meta(*expression, meta)?, + )))), + Expr::IsNotNull(expression) => Ok(Recipe::Method(Box::new(Method::UnaryOperation( + Value::not, + Recipe::Method(Box::new(Method::UnaryOperation( + Value::is_null, + Self::with_meta(*expression, meta)?, + ))), + )))), + Expr::UnaryOp { op, expr } => Ok(Recipe::Method(Box::new(Method::UnaryOperation( + op.into_method()?, + Self::with_meta(*expr, meta)?, + )))), + Expr::BinaryOp { op, left, right } => { + Ok(Recipe::Method(Box::new(Method::BinaryOperation( + op.into_method()?, + Self::with_meta(*left, meta)?, + Self::with_meta(*right, meta)?, + )))) + } + Expr::Function(function) => { + let name = function.name.0[0].value.clone(); + if name == "AVG" { + let argument = function + .args + .get(0) + .ok_or(RecipeError::InvalidExpression(error_expression_clone))? + .clone(); + let argument = Recipe::from_argument(argument, meta)?; + + Ok(meta.aggregate_average(argument)) + } else if let Ok(function_operator) = name.clone().into_method() { + let arguments = function + .args + .into_iter() + .map(|argument| Recipe::from_argument(argument, meta)) + .collect::>>()?; + Ok(Recipe::Method(Box::new(Method::Function( + function_operator, + arguments, + )))) + } else { + let argument = function + .args + .get(0) + .ok_or(RecipeError::InvalidExpression(error_expression_clone))? + .clone(); + let argument = Recipe::from_argument(argument, meta)?; + + Ok(meta.aggregate(Recipe::Method(Box::new(Method::Aggregate( + name.into_method()?, + argument, + ))))) + } + } + Expr::Case { + operand, + conditions, + results, + else_result, + } => Ok(Recipe::Method(Box::new(Method::Case { + operand: operand + .map(|operand| Self::with_meta(*operand, meta)) + .transpose()?, + cases: conditions + .into_iter() + .zip(results) + .map(|(condition, result)| { + Ok(( + Self::with_meta(condition, meta)?, + Self::with_meta(result, meta)?, + )) + }) + .collect::>>()?, + else_result: else_result + .map(|else_result| Self::with_meta(*else_result, meta)) + .transpose()?, + }))), + Expr::Cast { data_type, expr } => Ok(Recipe::Method(Box::new(Method::Cast( + data_type, + Self::with_meta(*expr, meta)?, + )))), + Expr::Between { + negated, + expr, + low, + high, + } => { + let body = Method::BinaryOperation( + Value::and, + Recipe::Method(Box::new(Method::BinaryOperation( + Value::gt_eq, + Self::with_meta(*expr.clone(), meta)?, + Self::with_meta(*low, meta)?, + ))), + Recipe::Method(Box::new(Method::BinaryOperation( + Value::lt_eq, + Self::with_meta(*expr, meta)?, + Self::with_meta(*high, meta)?, + ))), + ); + let body = if negated { + Method::UnaryOperation(Value::not, Recipe::Method(Box::new(body))) + } else { + body + }; + Ok(Recipe::Method(Box::new(body))) + } + /* TODO: #50 Expr::Subquery(query) => { + if let SetExpr::Select(statement) = query.body { + let table = statement + .from + .get(0) + .ok_or(RecipeError::InvalidQuery(format!("{:?}", statement)))? + .relation + .clone(); + let table = JoinManual::table_identity(table)?; + + let column = statement + .projection + .get(0) + .map(|item| { + if let SelectItem::UnnamedExpr(expression) = item { + Some(Self::with_meta(expression.clone(), meta)) + } else { + None + } + }) + .flatten() + .ok_or(RecipeError::InvalidQuery(format!("{:?}", statement)))??; + + let constraint = statement.selection; + + Ok(meta.subquery(Subquery { + table, + column, + constraint, + })?) + } else { + Err(RecipeError::UnimplementedQuery(format!("{:?}", query)).into()) + } + }*/ + Expr::Nested(expression) => Self::with_meta(*expression, meta), + unimplemented => Err(RecipeError::UnimplementedExpression(unimplemented).into()), + } + } + fn from_argument(argument: FunctionArg, meta: &mut RecipeMeta) -> Result { + match argument { + FunctionArg::Named { arg, .. } | FunctionArg::Unnamed(arg) => match arg { + FunctionArgExpr::Expr(arg) => Self::with_meta(arg, meta), + _ => Err(RecipeError::Unimplemented.into()), + }, + } + } + fn from_column(column: ObjectName, meta: &mut RecipeMeta) -> Recipe { + Recipe::Ingredient(Ingredient::Column(meta.find_or_append_column(column))) + } +} + +fn identifier_into_object_name(identifier: Vec) -> ObjectName { + identifier + .into_iter() + .map(|identifier| identifier.value) + .collect() +} diff --git a/src/executor/recipe/planned.rs b/src/executor/recipe/planned.rs new file mode 100644 index 00000000..ab429322 --- /dev/null +++ b/src/executor/recipe/planned.rs @@ -0,0 +1,274 @@ +use { + super::{ + Ingredient, MetaRecipe, Method, Recipe, RecipeError, RecipeUtilities, Resolve, SimplifyBy, + }, + crate::{ + executor::types::{ColumnInfo, Row}, + IndexFilter, Result, Value, + }, + fstrings::*, + std::collections::HashMap, +}; + +#[derive(Debug, Clone, Default)] +pub struct PlannedRecipe { + pub recipe: Recipe, + pub needed_column_indexes: Vec>, + pub aggregates: Vec, +} + +impl PlannedRecipe { + pub const TRUE: Self = Self { + recipe: Recipe::TRUE, + needed_column_indexes: vec![], + aggregates: vec![], + }; + pub fn new(meta_recipe: MetaRecipe, columns: &[ColumnInfo]) -> Result { + let MetaRecipe { recipe, meta } = meta_recipe; + let aggregates = meta.aggregates; + let needed_column_indexes = meta + .objects + .into_iter() + .map(|needed_column| { + needed_column + .map(|needed_column| { + let needed_column_index_options: Vec = columns + .iter() + .enumerate() + .filter_map(|(index, column)| { + if column == &needed_column { + Some(index) + } else { + None + } + }) + .collect(); + match needed_column_index_options.len() { + 0 => Err(RecipeError::MissingColumn(needed_column).into()), + 1 => Ok(Some(needed_column_index_options[0])), + _ => Err(RecipeError::AmbiguousColumn(needed_column).into()), + } + }) + .unwrap_or(Ok(None)) + }) + .collect::>>>()?; + + Ok(Self { + recipe, + needed_column_indexes, + aggregates, + }) + } + pub fn new_constraint( + meta_recipe: MetaRecipe, + columns: &[ColumnInfo], + ) -> Result<(Self, HashMap)> { + let mut new = Self::new(meta_recipe, columns)?; + let indexed_table_columns = columns.iter().cloned().enumerate().fold( + HashMap::new(), + |mut tables: HashMap>, (index, column)| { + if let Some(index_name) = new + .needed_column_indexes + .iter() + .find(|need_index| need_index == &&Some(index)) + .and_then(|_| column.index.clone()) + { + let col_table = &column.table.name; + if let Some(table) = tables.get_mut(col_table) { + table.push((index, index_name)); + } else { + tables.insert(col_table.clone(), vec![(index, index_name)]); + } + } + tables + }, + ); + + let indexed_column_tables = indexed_table_columns.into_iter().fold( + HashMap::new(), + |mut indexed_columns, (table, columns)| { + columns.into_iter().for_each(|(column, index_name)| { + indexed_columns.insert(column, (table.clone(), index_name)); + }); + indexed_columns + }, + ); + + let result = new.recipe.reduce_by_index_filter(indexed_column_tables); + new.recipe = result.0; + let index_filters = result.1.unwrap_or_default(); + + Ok((new, index_filters)) + } + pub fn of_index(index: usize) -> Self { + Self { + recipe: Recipe::SINGLE_COLUMN, + needed_column_indexes: vec![Some(index)], + aggregates: vec![], + } + } + pub fn confirm_join_constraint(&self, plane_row: &Row, self_row: &Row) -> Result { + // Very crucial to have performant, needs *a lot* of optimisation. + // This is currently not good enough. + // For a join such as: + /* + SELECT + * + FROM + big_table + LEFT JOIN bigger_table + ON big_table.a = LEFT(bigger_table.b, 3) + LEFT JOIN biggest_table + ON big_table.c = (biggest_table.d + 1) + */ + /* + Where: + (a) big_table rows = 1 000, + (b) bigger_table rows = 10 000, + (c) biggest_table rows = 100 000, + */ + // This will run a * b * c times (1 000 000 000 000 (1e+12)(one trillion) times). + // This isn't a particularly unusual query for a big database to run. + // Note that the number of times this runs can, will and should be optimised by reducing the number of rows that need to be compared with good planning scenarios. + // All of the above (obviously) applies to all functions used in this function. + let mut plane_row = plane_row.clone(); + plane_row.extend(self_row.clone()); + + self.confirm_constraint(&plane_row) + } + pub fn confirm_constraint(&self, row: &Row) -> Result { + let solution = self + .clone() + .simplify_by_row_simple(row)? + .confirm_or_err(RecipeError::MissingComponents.into())?; + Ok(matches!(solution, Value::Bool(true))) + } + fn simplify_by_row_simple(self, row: &Row) -> Result { + let row = self.condense_row(row)?; + self.recipe.simplify(SimplifyBy::Row(&row)) + } + fn condense_row(&self, row: &Row) -> Result { + self.needed_column_indexes + .iter() + .map(|index| { + index + .map(|index| { + Ok(row + .get(index) + .ok_or_else(|| { + RecipeError::MissingColumn(vec![ + String::from("Unreachable"), + f!("{row_len=:?} {index=:?}", row_len = row.len()), + ]) + })? + .clone()) + }) + .unwrap_or(Ok(Value::Null)) + }) + .collect::>>() + } + pub fn simplify_by_row(self, row: &Row) -> Result { + let row = self.condense_row(row)?; + let recipe = self.recipe.simplify(SimplifyBy::Row(&row))?; + let aggregates = self + .aggregates + .into_iter() + .map(|aggregate| aggregate.simplify(SimplifyBy::Row(&row))) + .collect::>>()?; + let needed_column_indexes = self.needed_column_indexes; + Ok(Self { + recipe, + aggregates, + needed_column_indexes, + }) + } + pub fn accumulate(&mut self, other: Self) -> Result<()> { + self.aggregates = self + .aggregates + .clone() // TODO: Don't clone + .into_iter() + .zip(other.aggregates) + .map(|(self_agg, other_agg)| { + let (operator, self_val) = if let Recipe::Method(self_agg) = self_agg { + if let Method::Aggregate(operator, recipe) = *self_agg { + let value = recipe + .confirm_or_err(RecipeError::UnreachableAggregatationFailed.into())?; + (operator, value) + } else { + return Err(RecipeError::UnreachableNotAggregate(format!( + "{:?}", + self_agg + )) + .into()); + } + } else { + return Err(RecipeError::UnreachableNotMethod(format!("{:?}", self_agg)).into()); + }; + + let other_val = if let Recipe::Method(other_agg) = other_agg { + if let Method::Aggregate(_, recipe) = *other_agg { + recipe.confirm_or_err(RecipeError::UnreachableAggregatationFailed.into())? + } else { + return Err(RecipeError::UnreachableNotAggregate(format!( + "{:?}", + other_agg + )) + .into()); + } + } else { + return Err( + RecipeError::UnreachableNotMethod(format!("{:?}", other_agg)).into(), + ); + }; + let value = Recipe::Ingredient(Ingredient::Value(operator(self_val, other_val)?)); + Ok(Recipe::Method(Box::new(Method::Aggregate(operator, value)))) + }) + .collect::>>()?; + Ok(()) + } + pub fn finalise_accumulation(self) -> Result { + let accumulated = self + .aggregates + .into_iter() + .map(|agg| { + if let Recipe::Method(method) = agg { + if let Method::Aggregate(_, Recipe::Ingredient(Ingredient::Value(value))) = + *method + { + return Ok(if let Value::Internal(value) = value { + Value::I64(value) + } else { + value + }); + } + } + Err(RecipeError::UnreachableAggregateFailed.into()) + }) + .collect::>()?; + self.recipe + .simplify(SimplifyBy::CompletedAggregate(accumulated))? + .confirm_or_err(RecipeError::UnreachableAggregateFailed.into()) + } + pub fn get_label( + &self, + selection_index: usize, + include_table: bool, + columns: &[ColumnInfo], + ) -> String { + if let Recipe::Ingredient(Ingredient::Column(_)) = self.recipe { + self.needed_column_indexes + .get(0) + .and_then(|index| index.and_then(|index| columns.get(index))) + .map(|column| { + if include_table { + format!("{}.{}", column.table.name, column.name) + } else { + column.name.clone() + } + }) + } else { + None + } + .unwrap_or(format!("unnamed_{}", selection_index)) + } +} diff --git a/src/executor/recipe/resolve.rs b/src/executor/recipe/resolve.rs new file mode 100644 index 00000000..db430e8a --- /dev/null +++ b/src/executor/recipe/resolve.rs @@ -0,0 +1,220 @@ +use { + super::{Ingredient, Method, Recipe, RecipeError, RecipeUtilities}, + crate::{executor::types::Row, Result, Value}, +}; + +#[derive(Clone)] +pub enum SimplifyBy<'a> { + Basic, + OptRow(&'a Vec>), + Row(&'a Row), + CompletedAggregate(Vec), +} + +pub trait Resolve +where + Self: Sized, +{ + fn simplify(self, component: SimplifyBy) -> Result; +} + +impl Resolve for Recipe { + fn simplify(self, component: SimplifyBy) -> Result { + match self { + Recipe::Ingredient(ingredient) => { + ingredient.simplify(component).map(Recipe::Ingredient) + } + Recipe::Method(method) => method.simplify(component).map(|method| { + if let Method::Value(value) = method { + Recipe::Ingredient(Ingredient::Value(value)) + } else { + Recipe::Method(Box::new(method)) + } + }), + } + } +} + +impl Resolve for Ingredient { + fn simplify(self, component: SimplifyBy) -> Result { + Ok(match self { + Ingredient::Column(index) => { + if let SimplifyBy::Row(row) = component { + Ingredient::Value(row.get(index).ok_or(RecipeError::UnreachableNoRow)?.clone()) + } else if let SimplifyBy::OptRow(row) = component { + row.get(index) + .and_then(Clone::clone) + .map(Ingredient::Value) + .unwrap_or(self) + } else { + self + } + } + Ingredient::Aggregate(index) => { + if let SimplifyBy::CompletedAggregate(values) = component { + Ingredient::Value(values.get(index).ok_or(RecipeError::Unreachable)?.clone()) + } else { + self + } + } + Ingredient::Value(..) => self, // Already simple! + }) + } +} + +#[allow(clippy::if_same_then_else)] // No idea what Clippy is trying to say here +#[allow(clippy::collapsible_else_if)] // Intentional for clarity +impl Resolve for Method { + fn simplify(self, component: SimplifyBy) -> Result { + Ok(match self { + Method::UnaryOperation(operator, recipe) => { + let recipe = recipe.simplify(component)?; + if let Some(value) = recipe.as_solution() { + Method::Value(operator(value)?) + } else { + Method::UnaryOperation(operator, recipe) + } + } + Method::BinaryOperation(operator, left, right) => { + let left = left.simplify(component.clone())?; + + if let Some(Value::Bool(value)) = left.as_solution() { + // Optimisation -- is this a good idea? + if !value { + // Clippy didn't like this without "as usize" + if operator as usize == Value::and as usize { + return Ok(Method::Value(Value::Bool(false))); + } + } else { + if operator as usize == Value::or as usize { + return Ok(Method::Value(Value::Bool(true))); + } + } + } + + let right = right.simplify(component)?; + + if let (Some(left), Some(right)) = (left.as_solution(), right.as_solution()) { + Method::Value(operator(left, right)?) + } else { + Method::BinaryOperation(operator, left, right) + } + } + Method::Function(function, arguments) => { + let arguments = arguments + .into_iter() + .map(|argument| argument.simplify(component.clone())) + .collect::>>()?; + if let Some(arguments) = arguments + .iter() + .map(|argument| argument.as_solution()) + .collect::>>() + { + Method::Value(function(arguments)?) + } else { + Method::Function(function, arguments) + } + } + Method::Cast(data_type, recipe) => { + let recipe = recipe.simplify(component)?; + if let Some(value) = recipe.as_solution() { + Method::Value(value.cast_datatype(&data_type)?) + } else { + Method::Cast(data_type, recipe) + } + } + + Method::Case { + operand, + cases, + else_result, + } => { + let operand = operand + .map(|operand| operand.simplify(component.clone())) + .transpose()?; + let else_result = else_result + .map(|else_result| else_result.simplify(component.clone())) + .transpose()?; + let cases = cases + .into_iter() + .map(|(condition, result)| { + Ok(( + condition.simplify(component.clone())?, + result.simplify(component.clone())?, + )) + }) + .collect::>>()?; + + if let Some(None) = operand.clone().map(|operand| operand.as_solution()) { + Method::Case { + operand, + cases, + else_result, + } + } else if let Some(None) = else_result + .clone() + .map(|else_result| else_result.as_solution()) + { + Method::Case { + operand, + cases, + else_result, + } + } else if let Some(cases) = cases + .iter() + .map(|(condition, result)| { + Some((condition.as_solution()?, result.as_solution()?)) + }) + .collect::>>() + { + let operand = operand.map(|operand| operand.as_solution()); + let else_result = else_result + .map(|else_result| else_result.as_solution()) + .unwrap_or(Some(Value::Null)) + .unwrap(); + if let Some(operand) = operand { + let operand = operand.unwrap(); + Method::Value( + cases + .into_iter() + .find_map(|(condition, result)| { + if operand == condition { + Some(result) + } else { + None + } + }) + .unwrap_or(else_result), + ) + } else { + Method::Value( + cases + .into_iter() + .find_map(|(condition, result)| { + if matches!(condition, Value::Bool(true)) { + Some(result) + } else { + None + } + }) + .unwrap_or(else_result), + ) + } + } else { + Method::Case { + operand, + cases, + else_result, + } + } + } + + Method::Value(..) => return Err(RecipeError::Unreachable.into()), + + // This will only occur for a special aggregate simplify + Method::Aggregate(operator, recipe) => { + Method::Aggregate(operator, recipe.simplify(component)?) + } + }) + } +} diff --git a/src/executor/set_variable.rs b/src/executor/set_variable.rs new file mode 100644 index 00000000..b852e16c --- /dev/null +++ b/src/executor/set_variable.rs @@ -0,0 +1,21 @@ +use { + crate::{ExecuteError, Glue, Result, Value}, + sqlparser::ast::{Ident, SetVariableValue}, +}; + +impl Glue { + pub async fn set_variable( + &mut self, + variable: &Ident, + value: &[SetVariableValue], + ) -> Result<()> { + let first_value = value.get(0).ok_or(ExecuteError::MissingComponentsForSet)?; + let value: Value = match first_value { + SetVariableValue::Ident(..) => unimplemented!(), + SetVariableValue::Literal(literal) => literal.try_into()?, + }; + let name = variable.value.clone(); + self.get_mut_context()?.set_variable(name, value); + Ok(()) + } +} diff --git a/src/executor/types.rs b/src/executor/types.rs new file mode 100644 index 00000000..c4e9e794 --- /dev/null +++ b/src/executor/types.rs @@ -0,0 +1,117 @@ +use { + crate::{ExecuteError, JoinError, Result, Value}, + serde::Serialize, + sqlparser::ast::{ObjectName as AstObjectName, TableFactor}, + std::fmt::Debug, +}; + +pub type Alias = Option; +pub type Label = String; +pub type Row = Vec; +pub type LabelsAndRows = (Vec