Skip to content

Commit

Permalink
Clean up scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
DimitrisMantas committed Nov 3, 2024
1 parent c16b608 commit 9b18005
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 573 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,3 @@
/logs/
/temp/
/roofsense/temp/

# Under Construction
/tools/
191 changes: 191 additions & 0 deletions roofsense/bag3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import gzip
import hashlib
import os
import re
import shutil
import tempfile
import urllib.parse
from dataclasses import dataclass
from enum import UNIQUE, StrEnum, auto, verify
from typing import Final, AnyStr

import geopandas as gpd
import requests

from roofsense.utils.file import confirm_write_op, get_default_data_dir


@dataclass
class BAG3DTileAssetInfo:
tid: list[str]
url: list[str]


@dataclass
class BAG3DTileAssetManifest:
img: BAG3DTileAssetInfo
lidr: BAG3DTileAssetInfo

def save(self, filename: str, overwrite: bool = False) -> None:
confirm_write_op(
filename,
type="file",
overwrite=overwrite
)
raise NotImplementedError


@verify(UNIQUE)
class LevelOfDetail(StrEnum):
LoD12 = auto()
LoD13 = auto()
LoD22 = auto()


class BAG3DTileStore:
_BASE_URL: Final[str] = "https://data.3dbag.nl/"
# TODO: Narrow this down
_TILE_FMT: Final[re.Pattern[AnyStr]] = re.compile(r"^\d{1,2}-\d{3,4}-\d{2,4}$")

def __init__(self, version: str = "2024.02.28") -> None:
self._dir = get_default_data_dir(version)
self._ver = version
self._init_version()

@property
def index(self) -> gpd.GeoDataFrame:
return self._index

@property
def version(self) -> str:
return self._ver

def download_index(
self, overwrite: bool = False, **kwargs
):
filename = os.path.join(
self._dir,
"tile_index.fgb"
)
confirm_write_op(
filename,
type="file",
overwrite=overwrite
)
self._index.to_file(filename, **kwargs)

# TODO: Make downloads optional if possible.
def download_tile(
self,
tile_id: str,
checksum: bool = True,
overwrite: bool = False,
**kwargs,
) -> None:
valid_fmt = self._validate_tile_id(tile_id)

filename = os.path.join(
self._dir,
f"{tile_id}.gpkg"
)
confirm_write_op(
filename,
type="file",
overwrite=overwrite
)

# TODO: Check that this works.
match = self._index.loc[self._index.tile_id == valid_fmt]

url: str = (
match["gpkg_download"]
.item()
)
if checksum:
true_sha: str = (
match["gpkg_sha256"]
.item()
)
curr_sha = hashlib.sha256()
with requests.get(url=url, **kwargs) as r:
r.raise_for_status()
with tempfile.NamedTemporaryFile(
mode="wb",
# https://stackoverflow.com/questions/23212435/permission-denied-to-write-to-my-temporary-file
delete=False,
) as temp:
for chunk in r.iter_content(
# Write the file as it arrives.
chunk_size=None
):
temp.write(chunk)
if checksum:
curr_sha.update(chunk)
if checksum:
if curr_sha.hexdigest() != true_sha:
raise RuntimeError(
f"Failed to verify the integrity of tile: {tile_id}. Try downloading again with the checksum option disabled.")
with gzip.open(temp.name, mode="rb") as src, open(filename, mode="wb") as dst:
shutil.copyfileobj(src, dst)
os.unlink(temp.name)

def read_tile(
self, tile_id: str, lod: LevelOfDetail
) -> gpd.GeoDataFrame:
filename = os.path.join(
self._dir,
f"{tile_id}.gpkg"
)
return gpd.read_file(filename, layer=f"{lod}_2d", force_2d=True)

def sample_tile(self) -> gpd.GeoSeries:
raise NotImplementedError

def asset_manifest(
self, tile_id: str, image_index: gpd.GeoDataFrame, lidar_index: gpd.GeoDataFrame
) -> BAG3DTileAssetManifest:
surfs = self.read_tile(tile_id, lod=LevelOfDetail.LoD22)

# TODO: Do not rebuild the manifest if it already exists.

image_matches = image_index.overlay(
surfs
)
lidar_matches = lidar_index.overlay(
surfs
)

return BAG3DTileAssetManifest(
img=BAG3DTileAssetInfo(
tid=image_matches.tid.unique(), url=image_matches.url.unique()
),
lidr=BAG3DTileAssetInfo(
tid=lidar_matches.tid.unique(), url=lidar_matches.url.unique()
),
)

def _init_version(self) -> None:
url = urllib.parse.urljoin(
self._BASE_URL, f"v{self._ver.replace('.', '')}/tile_index.fgb"
)
self._index = gpd.read_file(url)

def _validate_tile_id(self, tile_id: str) -> str:
valid_fmt = tile_id.replace('-', '/')
if not self._TILE_FMT.fullmatch(tile_id):
raise ValueError(f"Invalid tile ID: {tile_id}. Make sure to delimit digit groups with dashes.")
if valid_fmt not in self._index.tile_id.values:
raise ValueError(f"Failed to locate tile with ID: {tile_id} in index.")
return valid_fmt


if __name__ == '__main__':
BAG3DTileStore().download_tile("9-284-556")
BAG3DTileStore().asset_manifest(
"9-284-556",
image_index=gpd.GeoDataFrame(),
lidar_index=gpd.GeoDataFrame()
).save(
# TODO: Resolve the filename automatically.
)
temp = 1
56 changes: 33 additions & 23 deletions roofsense/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,18 @@
_HookCallback = typing.Callable[[requests.Response], typing.Any]


def get_default_data_dir(version: str) -> str:
path = (
pathlib.Path.home() / ".roofsense" / version
).as_posix()
os.makedirs(
path, exist_ok=True
)
return path


def confirm_write_op(
path: str, type: typing.Literal["dir", "file"], overwrite: bool = False
path: str, type: typing.Literal["dir", "file"], overwrite: bool = False
) -> bool:
"""Determine whether an upcoming write-to-disk operation should be performed.
Expand Down Expand Up @@ -73,14 +83,14 @@ def confirm_write_op(

class FileDownloader(abc.ABC):
def __init__(
self,
session: requests.Session,
overwrite: bool = False,
timeout: _Timeout | None = (3.05, None),
callbacks: _HookCallback | typing.Collection[_HookCallback] | None = None,
# FIXME - Find a way to turn progress reporting on and off automatically instead of leaving it up to
# the user to decide.
report_progress: bool = True,
self,
session: requests.Session,
overwrite: bool = False,
timeout: _Timeout | None = (3.05, None),
callbacks: _HookCallback | typing.Collection[_HookCallback] | None = None,
# FIXME - Find a way to turn progress reporting on and off automatically instead of leaving it up to
# the user to decide.
report_progress: bool = True,
) -> None:
self._session = session
# FIXME - Expose these options to the user.
Expand All @@ -106,7 +116,7 @@ def _fetch(self, url: str, filename: str) -> None:
return

with self._session.get(
url, timeout=self._timeout, hooks=self._callbacks, stream=True, verify=False
url, timeout=self._timeout, hooks=self._callbacks, stream=True, verify=False
) as response:
self._handle(response)

Expand Down Expand Up @@ -143,12 +153,12 @@ def _write(self, response: requests.Response, file: typing.BinaryIO) -> None:
content_length = response.headers["content-length"]
# FIXME - Color the entire progress bar white.
with tqdm.tqdm(
desc="Download Progress",
total=int(content_length) if content_length is not None else None,
disable=not self._preport,
unit="iB",
unit_scale=True,
unit_divisor=1024,
desc="Download Progress",
total=int(content_length) if content_length is not None else None,
disable=not self._preport,
unit="iB",
unit_scale=True,
unit_divisor=1024,
) as progress_bar:
# NOTE - Write the response as it arrives instead of splitting it into possibly smaller-than-received chunks
# resulting in additional I/O operations.
Expand All @@ -170,11 +180,11 @@ def download(self) -> None:

class ThreadedFileDownloader(FileDownloader):
def __init__(
self,
urls: typing.Collection[str],
filenames: typing.Collection[str | bytes | PathLike],
max_conns: int | None = None,
**kwargs,
self,
urls: typing.Collection[str],
filenames: typing.Collection[str | bytes | PathLike],
max_conns: int | None = None,
**kwargs,
) -> None:
super().__init__(report_progress=False, **kwargs)

Expand All @@ -185,15 +195,15 @@ def __init__(

def download(self) -> None:
with concurrent.futures.ThreadPoolExecutor(
max_workers=self._max_conns
max_workers=self._max_conns
) as executor:
futures = {
executor.submit(self._fetch, url, filename): (url, filename)
for url, filename in zip(self._urls, self._filenames)
}
# FIXME - Color the entire progress bar white.
with tqdm.tqdm(
desc="Download Progress", total=len(futures), unit="File"
desc="Download Progress", total=len(futures), unit="File"
) as progress_bar:
for task in concurrent.futures.as_completed(futures):
address, filename = futures[task]
Expand Down
54 changes: 54 additions & 0 deletions scripts/seeds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import geopandas as gpd
import geopy.geocoders
import numpy as np
import pyproj

cities = [
"Amsterdam",
"Rotterdam",
"The Hague",
"Utrecht",
"Eindhoven",
"Groningen",
"Tilburg",
"Almere",
"Breda",
"Nijmegen",
"Apeldoorn",
"Arnhem",
"Haarlem",
"Haarlemmermeer",
"Amersfoort",
"Zaanstad",
"Enschede",
"Den Bosch",
"Zwolle",
"Leiden",
"Zoetermeer",
"Leeuwarden",
"Ede",
"Maastricht",
"Dordrecht",
"Westland",
"Alphen aan den Rijn",
"Alkmaar",
"Emmen",
"Delft",
"Venlo",
"Deventer",
]


def get_seeds():
geocoder = geopy.geocoders.GoogleV3(api_key="", user_agent="RoofSense")
transform = pyproj.Transformer.from_crs("EPSG:4326", "EPSG:28992")

coords = []
for city in cities:
loc = geocoder.geocode(city + ", The Netherlands")
coords.append(transform.transform(loc.latitude, loc.longitude))

a = np.array(coords)

pts = gpd.points_from_xy(a[:, 0], a[:, 1], crs="EPSG:28992")
gpd.GeoDataFrame(data={"id": cities}, geometry=pts).to_file("cities.gpkg")
Binary file removed tools/bag3d.fgb
Binary file not shown.
Loading

0 comments on commit 9b18005

Please sign in to comment.