From e9507e671740abefe8aca44348f9a07cf59cd594 Mon Sep 17 00:00:00 2001 From: Frank Jimenez Date: Sat, 13 Jul 2024 00:24:48 +0100 Subject: [PATCH] bump v0.3.2 --- Cargo.toml | 2 +- README.md | 58 +++++++++++++++++++++++------- justfile | 7 ++++ src/file_types/common.rs | 14 +++++++- src/file_types/geo.rs | 71 +++++++++++++++++++++++++++++++++++++ src/file_types/geojson.rs | 37 +++++++++++++++++++ src/file_types/shapefile.rs | 26 ++++++++++++++ src/pg/crud.rs | 3 +- src/utils/cli.rs | 17 +++++++-- src/utils/validate.rs | 38 ++++++++++++++++++++ 10 files changed, 255 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 53049f0..5062598 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "popgis" -version = "0.3.1" +version = "0.3.2" edition = "2021" license = "MIT OR Apache-2.0" homepage = "http://jjcfrancis.co" diff --git a/README.md b/README.md index 7685517..438eb78 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,26 @@ # popgis -A blazing fast way to insert GeoJSON & ShapeFile into a PostGIS database. +A blazing fast way to insert large GeoJSON & ShapeFile into a PostGIS database. + +## Why? + +Loading large datasets into a PostGIS database can take a long time and reducing the completion time of such jobs time was the main aim of this project. `popgis` can be **x2 faster than ogr2ogr** and it's most noticeable when the input file is very large (with small dataset the performance increase is not as obvious) but also when working against non-local databases. ## Installation +You can install `popgis` via `Cargo` or `Homebrew`. Choose one option from below: + +### Cargo +```bash +cargo install popgis +``` + +### Homebrew ```bash brew tap jjcfrancisco/popgis brew install popgis ``` ## Usage -`input`, `uri` & `table` are required; `schema` is optional. +`input`, `uri` & `table` are required; `schema` & `srid` are optional. **Flags** @@ -16,34 +28,54 @@ brew install popgis `uri`: the PostGIS database where you'd like to insert the input data. -`schema`: where you would like the specified table. **This is optional**. *Default is public.* +`schema`: where you would like the specified table. **Optional**. *Default is public.* `table`: choose the name of the resulting table. +`srid`: choose either 4326 (WGS84) or 3857 (Web Mercator). **Optional**. *Default is 4326.* + **Examples** ```bash ## GeoJSON -> PostGIS ## -popgis --input spain.geojson \ - --uri postgresql://my_username:my_password@localhost:5432/my_database \ - --schema osm \ - --table waters +popgis -i spain.geojson \ + -u postgresql://my_username:my_password@localhost:5432/my_database \ + -s osm \ + -t waters \ + --srid 3857 ## ShapeFile -> PostGIS ## -popgis --input water_polygons.shp \ - --uri postgresql://my_username:my_password@localhost:5432/my_database \ - --schema osm \ - --table waters +popgis -i water_polygons.shp \ + -u postgresql://my_username:my_password@localhost:5432/my_database \ + -s osm \ + -t waters ``` ## Benchmarks -*Upcoming...* + +### ShapeFile + +| file size | `popgis` took | `ogr2ogr` took | environment | +|-----------|----------------|----------------|-------------| +| 1.2GB | **36sec** | 1min 15sec | local [PostGIS](https://hub.docker.com/r/kartoza/postgis/) | + +> The file used for this test can be found [here](https://osmdata.openstreetmap.de/data/water-polygons.html). + +### GeoJSON + +| file size | `popgis` took | `ogr2ogr` took | environment | +|-----------|----------------|----------------|-------------| +| 103.9MB | **2sec** | 5sec | local [PostGIS](https://hub.docker.com/r/kartoza/postgis/) | + +> The file used for this test can be found [here](https://data.cityofnewyork.us/City-Government/NYC-Street-Centerline-CSCL-/exjm-f27b). ## Future implementation The list below contains the upcoming implementations. To do: +* Allow nested GeoJSON properties. +* Improve stdout. * Allow GeoJSON as input. ## License -See [`LICENSE`](./LICENSE) \ No newline at end of file +See [`LICENSE`](./LICENSE) diff --git a/justfile b/justfile index ea538a0..aa33b34 100644 --- a/justfile +++ b/justfile @@ -11,3 +11,10 @@ --uri postgresql://pio:password@localhost:25432/popgis \ --schema geojson \ --table spain + +@try-more: + cargo build --release + cd ./target/release/ && ./popgis -i ~/Downloads/street.geojson \ + -u postgresql://pio:password@localhost:25432/popgis \ + -s osm \ + -t street --srid 3857 diff --git a/src/file_types/common.rs b/src/file_types/common.rs index 8059455..222f08c 100644 --- a/src/file_types/common.rs +++ b/src/file_types/common.rs @@ -51,7 +51,7 @@ pub enum AcceptedTypes { } // Create enum of supported file types -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum FileType { Shapefile, GeoJson, @@ -71,3 +71,15 @@ pub fn determine_file_type(input_file: &str) -> Result { } } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_determine_file_type() { + let shapefile = "examples/shapefile/andalucia.shp"; + let geojson = "examples/geojson/spain.geojson"; + assert_eq!(determine_file_type(shapefile).unwrap(), FileType::Shapefile); + assert_eq!(determine_file_type(geojson).unwrap(), FileType::GeoJson); + } +} diff --git a/src/file_types/geo.rs b/src/file_types/geo.rs index a9d102c..97bb72a 100644 --- a/src/file_types/geo.rs +++ b/src/file_types/geo.rs @@ -46,3 +46,74 @@ pub fn to_geo(shape: &Shape) -> Result> { _ => Err("Unsupported shape type".into()), } } + +#[cfg(test)] +mod tests { + use super::*; + use shapefile::{Point, Polyline}; + + #[test] + fn test_to_geo_point() { + let shape = Shape::Point(shapefile::Point::new(1.0, 2.0)); + let geo = to_geo(&shape).unwrap(); + assert_eq!(geo, geo::Geometry::Point(geo::Point::new(1.0, 2.0))); + } + + #[test] + fn test_to_geo_line() { + let first_part = vec![ + Point::new(1.0, 1.0), + Point::new(2.0, 2.0), + ]; + + let second_part = vec![ + Point::new(3.0, 1.0), + Point::new(5.0, 6.0), + ]; + + let poly = Polyline::with_parts(vec![first_part, second_part]); + let shape = Shape::Polyline(poly); + let geo = to_geo(&shape).unwrap(); + let expected = geo::Geometry::LineString(geo::LineString::from(vec![ + Coord::from((1.0, 1.0)), + Coord::from((2.0, 2.0)), + Coord::from((3.0, 1.0)), + Coord::from((5.0, 6.0)), + ])); + assert_eq!(geo, expected); + } + + #[test] + fn test_to_geo_poly() { + + let first_part = vec![ + Point::new(1.0, 1.0), + Point::new(2.0, 2.0), + ]; + + let second_part = vec![ + Point::new(3.0, 1.0), + Point::new(5.0, 6.0), + ]; + + let third_part = vec![ + Point::new(17.0, 15.0), + Point::new(18.0, 19.0), + Point::new(20.0, 19.0), + ]; + let poly = Polyline::with_parts(vec![first_part, second_part, third_part]); + let shape = Shape::Polyline(poly); + let geo = to_geo(&shape).unwrap(); + let expected = geo::Geometry::LineString(geo::LineString::from(vec![ + Coord::from((1.0, 1.0)), + Coord::from((2.0, 2.0)), + Coord::from((3.0, 1.0)), + Coord::from((5.0, 6.0)), + Coord::from((17.0, 15.0)), + Coord::from((18.0, 19.0)), + Coord::from((20.0, 19.0)), + ])); + assert_eq!(geo, expected); + + } +} diff --git a/src/file_types/geojson.rs b/src/file_types/geojson.rs index f22496b..6d366c8 100644 --- a/src/file_types/geojson.rs +++ b/src/file_types/geojson.rs @@ -49,6 +49,13 @@ pub fn determine_data_types(file_path: &str) -> Result> { data_type: Type::BOOL, }); } + // If null + serde_json::Value::Null => { + table_config.push(NewTableTypes { + column_name: key, + data_type: Type::TEXT, + }); + } _ => println!("Type currently not supported"), } } @@ -91,6 +98,9 @@ pub fn read_geojson(file_path: &str) -> Result { serde_json::Value::Bool(boolean) => { row.add(AcceptedTypes::Bool(Some(boolean))); } + serde_json::Value::Null => { + row.add(AcceptedTypes::Text(None)); + } _ => println!("Type currently not supported"), } } @@ -110,3 +120,30 @@ pub fn read_geojson(file_path: &str) -> Result { Ok(rows) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_determine_data_types() { + let file_path = "examples/geojson/spain.geojson"; + let data_types = determine_data_types(file_path).unwrap(); + assert_eq!(data_types.len(), 3); + for data_type in data_types { + match data_type.column_name.as_str() { + "source" => assert_eq!(data_type.data_type, Type::TEXT), + "id" => assert_eq!(data_type.data_type, Type::TEXT), + "name" => assert_eq!(data_type.data_type, Type::TEXT), + _ => (), + } + } + } + + #[test] + fn test_read_geojson() { + let file_path = "examples/geojson/spain.geojson"; + let rows = read_geojson(file_path).unwrap(); + assert_eq!(rows.row.len(), 19); + } +} diff --git a/src/file_types/shapefile.rs b/src/file_types/shapefile.rs index 367d797..43f631a 100644 --- a/src/file_types/shapefile.rs +++ b/src/file_types/shapefile.rs @@ -96,3 +96,29 @@ pub fn read_shapefile(file_path: &str) -> Result { Ok(rows) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_determine_data_types() { + let file_path = "examples/shapefile/andalucia.shp"; + let data_types = determine_data_types(file_path).unwrap(); + assert_eq!(data_types.len(), 2); + for data_type in data_types { + if data_type.column_name == "x" { + assert_eq!(data_type.data_type, Type::FLOAT8); + } else if data_type.column_name == "y" { + assert_eq!(data_type.data_type, Type::FLOAT8); + } + } + } + + #[test] + fn test_read_shapefile() { + let file_path = "examples/shapefile/andalucia.shp"; + let rows = read_shapefile(file_path).unwrap(); + assert_eq!(rows.row.len(), 36); + } +} diff --git a/src/pg/crud.rs b/src/pg/crud.rs index 6f698b3..9c1e95a 100644 --- a/src/pg/crud.rs +++ b/src/pg/crud.rs @@ -22,6 +22,7 @@ pub fn create_table( schema_name: &Option, config: &Vec, uri: &str, + srid: i32, ) -> Result { let mut query = String::from("CREATE TABLE IF NOT EXISTS "); if let Some(schema) = schema_name { @@ -47,7 +48,7 @@ pub fn create_table( _ => println!("Type currently not supported"), } } - query.push_str("geom Geometry(Geometry, 4326)"); + query.push_str(&format!("geom Geometry(Geometry, {})", srid)); query.push_str(");"); println!("{}", query); diff --git a/src/utils/cli.rs b/src/utils/cli.rs index e65026c..162c200 100644 --- a/src/utils/cli.rs +++ b/src/utils/cli.rs @@ -28,12 +28,21 @@ pub struct Cli { /// Schema name to create table in. Optional. #[arg(short, long)] pub schema: Option, + + /// Srid, if not provided, will default to 4326 + #[arg(long)] + pub srid: Option, } pub fn run() -> Result<()> { - let args = Cli::parse(); + let mut args = Cli::parse(); validate_args(&args)?; + // If not provided srid will default to 4326 + if args.srid.is_none() { + args.srid.get_or_insert(4326); + } + let file_type = determine_file_type(&args.input)?; let (rows, config) = match file_type { FileType::Shapefile => { @@ -47,7 +56,11 @@ pub fn run() -> Result<()> { if let Some(schema) = &args.schema { create_schema(&schema, &args.uri)?; } - let stmt = create_table(&args.table, &args.schema, &config, &args.uri)?; + let stmt = if let Some(srid) = args.srid { + create_table(&args.table, &args.schema, &config, &args.uri, srid)? + } else { + create_table(&args.table, &args.schema, &config, &args.uri, 4326)? + }; let geom_type = infer_geom_type(stmt)?; insert_rows(&rows, &config, geom_type, &args.uri, &args.schema, &args.table)?; diff --git a/src/utils/validate.rs b/src/utils/validate.rs index c59cada..624ed9d 100644 --- a/src/utils/validate.rs +++ b/src/utils/validate.rs @@ -20,6 +20,44 @@ pub fn validate_args(args: &Cli) -> Result<()> { return Err("Table is empty".into()); } + // Check if srid is 4326 or 3857 + if let Some(srid) = args.srid { + if srid != 4326 && srid != 3857 { + return Err("SRID must be 4326 or 3857".into()); + } + } + Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + // Without schema + #[test] + fn test_validate_args() { + let args = Cli { + input: "examples/geojson/spain.geojson".to_string(), + uri: "postgresql://localhost:5432/postgis".to_string(), + table: "points".to_string(), + schema: None, + srid: None, + }; + assert!(validate_args(&args).is_ok()); + } + + // With schema + #[test] + fn test_validate_args_with_schema() { + let args = Cli { + input: "examples/shapefile/andalucia.shp".to_string(), + uri: "postgresql://localhost:5432/postgis".to_string(), + table: "points".to_string(), + schema: Some("gis".to_string()), + srid: Some(4326) + }; + assert!(validate_args(&args).is_ok()); + } +}