Skip to content

Commit

Permalink
bump v0.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
jjcfrancisco committed Jul 12, 2024
1 parent 07f46d5 commit e9507e6
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "popgis"
version = "0.3.1"
version = "0.3.2"
edition = "2021"
license = "MIT OR Apache-2.0"
homepage = "http://jjcfrancis.co"
Expand Down
58 changes: 45 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,49 +1,81 @@
# popgis
A blazing fast way to insert GeoJSON & ShapeFile into a PostGIS database.
A blazing fast way to insert large GeoJSON & ShapeFile into a PostGIS database.

## Why?

Loading large datasets into a PostGIS database can take a long time and reducing the completion time of such jobs time was the main aim of this project. `popgis` can be **x2 faster than ogr2ogr** and it's most noticeable when the input file is very large (with small dataset the performance increase is not as obvious) but also when working against non-local databases.

## Installation
You can install `popgis` via `Cargo` or `Homebrew`. Choose one option from below:

### Cargo
```bash
cargo install popgis
```

### Homebrew
```bash
brew tap jjcfrancisco/popgis
brew install popgis
```

## Usage
`input`, `uri` & `table` are required; `schema` is optional.
`input`, `uri` & `table` are required; `schema` & `srid` are optional.

**Flags**

`input`: choose the *geojson* or *shapefile* file to insert into a PostGIS database.

`uri`: the PostGIS database where you'd like to insert the input data.

`schema`: where you would like the specified table. **This is optional**. *Default is public.*
`schema`: where you would like the specified table. **Optional**. *Default is public.*

`table`: choose the name of the resulting table.

`srid`: choose either 4326 (WGS84) or 3857 (Web Mercator). **Optional**. *Default is 4326.*

**Examples**
```bash
## GeoJSON -> PostGIS ##
popgis --input spain.geojson \
--uri postgresql://my_username:my_password@localhost:5432/my_database \
--schema osm \
--table waters
popgis -i spain.geojson \
-u postgresql://my_username:my_password@localhost:5432/my_database \
-s osm \
-t waters \
--srid 3857

## ShapeFile -> PostGIS ##
popgis --input water_polygons.shp \
--uri postgresql://my_username:my_password@localhost:5432/my_database \
--schema osm \
--table waters
popgis -i water_polygons.shp \
-u postgresql://my_username:my_password@localhost:5432/my_database \
-s osm \
-t waters
```

## Benchmarks
*Upcoming...*

### ShapeFile

| file size | `popgis` took | `ogr2ogr` took | environment |
|-----------|----------------|----------------|-------------|
| 1.2GB | **36sec** | 1min 15sec | local [PostGIS](https://hub.docker.com/r/kartoza/postgis/) |

> The file used for this test can be found [here](https://osmdata.openstreetmap.de/data/water-polygons.html).
### GeoJSON

| file size | `popgis` took | `ogr2ogr` took | environment |
|-----------|----------------|----------------|-------------|
| 103.9MB | **2sec** | 5sec | local [PostGIS](https://hub.docker.com/r/kartoza/postgis/) |

> The file used for this test can be found [here](https://data.cityofnewyork.us/City-Government/NYC-Street-Centerline-CSCL-/exjm-f27b).
## Future implementation
The list below contains the upcoming implementations.

To do:

* Allow nested GeoJSON properties.
* Improve stdout.
* <del>Allow GeoJSON as input.</del>

## License
See [`LICENSE`](./LICENSE)
See [`LICENSE`](./LICENSE)
7 changes: 7 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,10 @@
--uri postgresql://pio:password@localhost:25432/popgis \
--schema geojson \
--table spain

@try-more:
cargo build --release
cd ./target/release/ && ./popgis -i ~/Downloads/street.geojson \
-u postgresql://pio:password@localhost:25432/popgis \
-s osm \
-t street --srid 3857
14 changes: 13 additions & 1 deletion src/file_types/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub enum AcceptedTypes {
}

// Create enum of supported file types
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum FileType {
Shapefile,
GeoJson,
Expand All @@ -71,3 +71,15 @@ pub fn determine_file_type(input_file: &str) -> Result<FileType> {
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_determine_file_type() {
let shapefile = "examples/shapefile/andalucia.shp";
let geojson = "examples/geojson/spain.geojson";
assert_eq!(determine_file_type(shapefile).unwrap(), FileType::Shapefile);
assert_eq!(determine_file_type(geojson).unwrap(), FileType::GeoJson);
}
}
71 changes: 71 additions & 0 deletions src/file_types/geo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,74 @@ pub fn to_geo(shape: &Shape) -> Result<geo::Geometry<f64>> {
_ => Err("Unsupported shape type".into()),
}
}

#[cfg(test)]
mod tests {
use super::*;
use shapefile::{Point, Polyline};

#[test]
fn test_to_geo_point() {
let shape = Shape::Point(shapefile::Point::new(1.0, 2.0));
let geo = to_geo(&shape).unwrap();
assert_eq!(geo, geo::Geometry::Point(geo::Point::new(1.0, 2.0)));
}

#[test]
fn test_to_geo_line() {
let first_part = vec![
Point::new(1.0, 1.0),
Point::new(2.0, 2.0),
];

let second_part = vec![
Point::new(3.0, 1.0),
Point::new(5.0, 6.0),
];

let poly = Polyline::with_parts(vec![first_part, second_part]);
let shape = Shape::Polyline(poly);
let geo = to_geo(&shape).unwrap();
let expected = geo::Geometry::LineString(geo::LineString::from(vec![
Coord::from((1.0, 1.0)),
Coord::from((2.0, 2.0)),
Coord::from((3.0, 1.0)),
Coord::from((5.0, 6.0)),
]));
assert_eq!(geo, expected);
}

#[test]
fn test_to_geo_poly() {

let first_part = vec![
Point::new(1.0, 1.0),
Point::new(2.0, 2.0),
];

let second_part = vec![
Point::new(3.0, 1.0),
Point::new(5.0, 6.0),
];

let third_part = vec![
Point::new(17.0, 15.0),
Point::new(18.0, 19.0),
Point::new(20.0, 19.0),
];
let poly = Polyline::with_parts(vec![first_part, second_part, third_part]);
let shape = Shape::Polyline(poly);
let geo = to_geo(&shape).unwrap();
let expected = geo::Geometry::LineString(geo::LineString::from(vec![
Coord::from((1.0, 1.0)),
Coord::from((2.0, 2.0)),
Coord::from((3.0, 1.0)),
Coord::from((5.0, 6.0)),
Coord::from((17.0, 15.0)),
Coord::from((18.0, 19.0)),
Coord::from((20.0, 19.0)),
]));
assert_eq!(geo, expected);

}
}
37 changes: 37 additions & 0 deletions src/file_types/geojson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ pub fn determine_data_types(file_path: &str) -> Result<Vec<NewTableTypes>> {
data_type: Type::BOOL,
});
}
// If null
serde_json::Value::Null => {
table_config.push(NewTableTypes {
column_name: key,
data_type: Type::TEXT,
});
}
_ => println!("Type currently not supported"),
}
}
Expand Down Expand Up @@ -91,6 +98,9 @@ pub fn read_geojson(file_path: &str) -> Result<Rows> {
serde_json::Value::Bool(boolean) => {
row.add(AcceptedTypes::Bool(Some(boolean)));
}
serde_json::Value::Null => {
row.add(AcceptedTypes::Text(None));
}
_ => println!("Type currently not supported"),
}
}
Expand All @@ -110,3 +120,30 @@ pub fn read_geojson(file_path: &str) -> Result<Rows> {

Ok(rows)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_determine_data_types() {
let file_path = "examples/geojson/spain.geojson";
let data_types = determine_data_types(file_path).unwrap();
assert_eq!(data_types.len(), 3);
for data_type in data_types {
match data_type.column_name.as_str() {
"source" => assert_eq!(data_type.data_type, Type::TEXT),
"id" => assert_eq!(data_type.data_type, Type::TEXT),
"name" => assert_eq!(data_type.data_type, Type::TEXT),
_ => (),
}
}
}

#[test]
fn test_read_geojson() {
let file_path = "examples/geojson/spain.geojson";
let rows = read_geojson(file_path).unwrap();
assert_eq!(rows.row.len(), 19);
}
}
26 changes: 26 additions & 0 deletions src/file_types/shapefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,29 @@ pub fn read_shapefile(file_path: &str) -> Result<Rows> {

Ok(rows)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_determine_data_types() {
let file_path = "examples/shapefile/andalucia.shp";
let data_types = determine_data_types(file_path).unwrap();
assert_eq!(data_types.len(), 2);
for data_type in data_types {
if data_type.column_name == "x" {
assert_eq!(data_type.data_type, Type::FLOAT8);
} else if data_type.column_name == "y" {
assert_eq!(data_type.data_type, Type::FLOAT8);
}
}
}

#[test]
fn test_read_shapefile() {
let file_path = "examples/shapefile/andalucia.shp";
let rows = read_shapefile(file_path).unwrap();
assert_eq!(rows.row.len(), 36);
}
}
3 changes: 2 additions & 1 deletion src/pg/crud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub fn create_table(
schema_name: &Option<String>,
config: &Vec<NewTableTypes>,
uri: &str,
srid: i32,
) -> Result<Statement> {
let mut query = String::from("CREATE TABLE IF NOT EXISTS ");
if let Some(schema) = schema_name {
Expand All @@ -47,7 +48,7 @@ pub fn create_table(
_ => println!("Type currently not supported"),
}
}
query.push_str("geom Geometry(Geometry, 4326)");
query.push_str(&format!("geom Geometry(Geometry, {})", srid));
query.push_str(");");
println!("{}", query);

Expand Down
17 changes: 15 additions & 2 deletions src/utils/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,21 @@ pub struct Cli {
/// Schema name to create table in. Optional.
#[arg(short, long)]
pub schema: Option<String>,

/// Srid, if not provided, will default to 4326
#[arg(long)]
pub srid: Option<i32>,
}

pub fn run() -> Result<()> {
let args = Cli::parse();
let mut args = Cli::parse();
validate_args(&args)?;

// If not provided srid will default to 4326
if args.srid.is_none() {
args.srid.get_or_insert(4326);
}

let file_type = determine_file_type(&args.input)?;
let (rows, config) = match file_type {
FileType::Shapefile => {
Expand All @@ -47,7 +56,11 @@ pub fn run() -> Result<()> {
if let Some(schema) = &args.schema {
create_schema(&schema, &args.uri)?;
}
let stmt = create_table(&args.table, &args.schema, &config, &args.uri)?;
let stmt = if let Some(srid) = args.srid {
create_table(&args.table, &args.schema, &config, &args.uri, srid)?
} else {
create_table(&args.table, &args.schema, &config, &args.uri, 4326)?
};
let geom_type = infer_geom_type(stmt)?;
insert_rows(&rows, &config, geom_type, &args.uri, &args.schema, &args.table)?;

Expand Down
38 changes: 38 additions & 0 deletions src/utils/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,44 @@ pub fn validate_args(args: &Cli) -> Result<()> {
return Err("Table is empty".into());
}

// Check if srid is 4326 or 3857
if let Some(srid) = args.srid {
if srid != 4326 && srid != 3857 {
return Err("SRID must be 4326 or 3857".into());
}
}

Ok(())

}

#[cfg(test)]
mod tests {
use super::*;

// Without schema
#[test]
fn test_validate_args() {
let args = Cli {
input: "examples/geojson/spain.geojson".to_string(),
uri: "postgresql://localhost:5432/postgis".to_string(),
table: "points".to_string(),
schema: None,
srid: None,
};
assert!(validate_args(&args).is_ok());
}

// With schema
#[test]
fn test_validate_args_with_schema() {
let args = Cli {
input: "examples/shapefile/andalucia.shp".to_string(),
uri: "postgresql://localhost:5432/postgis".to_string(),
table: "points".to_string(),
schema: Some("gis".to_string()),
srid: Some(4326)
};
assert!(validate_args(&args).is_ok());
}
}

0 comments on commit e9507e6

Please sign in to comment.