diff --git a/Cargo.toml b/Cargo.toml index 0611516..3fe2d47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ license = "MIT OR Apache-2.0" homepage = "http://jjcfrancis.co" readme = "README.md" repository = "https://github.com/jjcfrancisco/popgis" -description = "A blazing fast way to insert GeoJSON & ShapeFile into a PostGIS database." +description = "A blazing fast way to insert GeoJSON, ShapeFile & OsmPBF into a PostGIS database." categories = ["science::geo"] [dependencies] diff --git a/README.md b/README.md index 38b3076..5f1b9c3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # PopGIS -A blazing fast way to insert large GeoJSON & ShapeFile into a PostGIS database. +A blazing fast way to insert large GeoJSON, ShapeFile & OsmPBF into a PostGIS database. ## Why? Importing large datasets into a PostGIS database can take a long time and the aim of PopGIS is to optimize the performance of such operations. **PopGIS is 2x faster than ogr2ogr**, particularly with very large input files against remote databases. Although the performance improvement for smaller datasets may be minimal, the efficiency gains for larger datasets are considerable. For more details, go to the [benchmarks](#benchmarks) section. @@ -58,6 +58,11 @@ popgis --input spain.geojson \ --srid 4326 \ --reproject 3857 +popgis --input andalucia-latest.osm.pbf + --uri postgresql://my_username:my_password@localhost:5432/my_database \ + --schema osm \ + --table andalucia + ``` #### Modes @@ -84,15 +89,21 @@ Although non extensive, the benchmarking shows **PopGIS is twice faster than ogr > The file used for this test can be found [here](https://data.cityofnewyork.us/City-Government/NYC-Street-Centerline-CSCL-/exjm-f27b). +### OsmPBF +Coming soon. + ## Future implementations +* Allow filtering based on a key value pair. * Add GeoParquet support. * From PostGIS to GeoJSON/ShapeFile. * Reintroduce the append mode (temporarily removed in `v0.4.0` due to inconsistent results). +* Examples to pipe the standard output of `what-osm-pbf` with `PopGIS` as input. ## Limitations * PopGIS does not currently support nested GeoJSON properties. +* When using `osm.pbf`, use the smallest Geofabrik areas to get the best performance - try using it in conjuction with [`what-osm-pbf`](https://crates.io/crates/what-osm-pbf) CLI. ## License See [`LICENSE`](./LICENSE) diff --git a/src/format/osmpbf.rs b/src/format/osmpbf.rs index 4ac0873..f908447 100644 --- a/src/format/osmpbf.rs +++ b/src/format/osmpbf.rs @@ -113,10 +113,10 @@ fn build_polys_and_lines(file_path: &str, nodes: &HashMap) -> Resul Ok(all) } -fn osmpbf_to_rows(all: &Vec) -> Result { +fn osmpbf_to_rows(all: &[OsmPbf]) -> Result { let mut rows = Rows::new(); - for item in all.into_iter() { + for item in all.iter() { let mut row = Row::new(); // Tags row.add(AcceptedTypes::Array(Some(item.tags.clone()))); @@ -131,11 +131,10 @@ fn osmpbf_to_rows(all: &Vec) -> Result { } pub fn determine_data_types() -> Result> { - let mut data_types: Vec = Vec::new(); - data_types.push(NewTableTypes { + let data_types: Vec = vec![NewTableTypes { column_name: "tags".to_string(), data_type: Type::TEXT_ARRAY, - }); + }]; Ok(data_types) } diff --git a/src/utils/cli.rs b/src/utils/cli.rs index 09f66af..af77038 100644 --- a/src/utils/cli.rs +++ b/src/utils/cli.rs @@ -8,7 +8,7 @@ use crate::{Error, Result}; use clap::Parser; -/// A blazing fast way to insert GeoJSON & ShapeFiles into a PostGIS database +/// A blazing fast way to insert GeoJSON, ShapeFiles & OsmPBF into a PostGIS database #[derive(Parser, Debug)] #[command(about, version)] pub struct Cli { @@ -60,7 +60,10 @@ pub fn run() -> Result<()> { geojson::read_geojson(&args)?, geojson::determine_data_types(&args.input)?, ), - FileType::Osmpbf => (osmpbf::read_osmpbf(&args)?, osmpbf::determine_data_types()?), + FileType::Osmpbf => { + args.srid = Some(4326); // OsmPbf files are always in 4326 + (osmpbf::read_osmpbf(&args)?, osmpbf::determine_data_types()?) + } }; // If mode not present, check if table exists diff --git a/src/utils/validate.rs b/src/utils/validate.rs index ce5ca50..3a05d06 100644 --- a/src/utils/validate.rs +++ b/src/utils/validate.rs @@ -1,13 +1,14 @@ -use crate::{Result, Error}; use super::cli::Cli; +use crate::{Error, Result}; use std::path::Path; // Validate the file path pub fn validate_args(args: &Cli) -> Result<()> { - // Check input file exists if !Path::new(&args.input).exists() { - return Err(Error::FailedValidation("❌ Input file does not exist".into())); + return Err(Error::FailedValidation( + "❌ Input file does not exist".into(), + )); } // Check URL is not empty @@ -23,12 +24,13 @@ pub fn validate_args(args: &Cli) -> Result<()> { // Check if srid is 4326 or 3857 if let Some(srid) = args.srid { if srid != 4326 && srid != 3857 { - return Err(Error::FailedValidation("❌ SRID must be 4326 or 3857".into())); + return Err(Error::FailedValidation( + "❌ SRID must be 4326 or 3857".into(), + )); } } Ok(()) - } #[cfg(test)] @@ -45,7 +47,7 @@ mod tests { schema: None, srid: None, mode: None, - reproject: None + reproject: None, }; assert!(validate_args(&args).is_ok()); } @@ -60,7 +62,7 @@ mod tests { schema: Some("gis".to_string()), srid: Some(4326), mode: None, - reproject: None + reproject: None, }; assert!(validate_args(&args).is_ok()); }