advanced use case using vsimem #443
-
Hello @vincentsarago I'd like to ask/learn about a very specific use case for titiler. Long story short, I am passing multiple COGs to the cog router through url parameter. My plan is to create a band separated VRT on the server side (managed successfully) and use the expression param to perform simple map algebra on the VRT and return a new layer. I managed to create a multi-band VRT and I believe this is doable(tried statistics and info endpoints successfully). Performance could be an issue but I will handle that later. I tried to use VSIMEM to store the VRT in RAM but titiler complains it can not find the VRT file. Is there a way where I could simply turn this off?. When I use a file system based VRT everything works well. Thanks so much for titiler and your time |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 11 replies
-
I've never use |
Beta Was this translation helpful? Give feedback.
-
MultiBandReaderfrom typing import Any, Dict, List, Type
import attr
from morecantile import TileMatrixSet
from rasterio.path import parse_path
from rio_tiler.constants import WEB_MERCATOR_TMS
from rio_tiler.errors import InvalidBandName
from rio_tiler.io import BaseReader, COGReader, MultiBandReader
from tilebench import profile
files = [
"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B04.tif",
"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B03.tif",
"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B02.tif",
]
@attr.s
class MultiFilesBandsReader(MultiBandReader):
"""Multiple Files as Bands."""
input: List[str] = attr.ib()
tms: TileMatrixSet = attr.ib(default=WEB_MERCATOR_TMS)
reader_options: Dict = attr.ib(factory=dict)
reader: Type[BaseReader] = attr.ib(default=COGReader)
def __attrs_post_init__(self):
"""Fetch Reference band to get the bounds."""
self.bands = [f"b{ix + 1}" for ix in range(len(self.input))]
# We assume the files are similar so we use the first one to
# get the bounds/crs/min,maxzoom
# Note: you can skip that and hard code values
with self.reader(self.input[0], tms=self.tms, **self.reader_options) as cog:
self.bounds = cog.bounds
self.crs = cog.crs
self.minzoom = cog.minzoom
self.maxzoom = cog.maxzoom
def _get_band_url(self, band: str) -> str:
"""Validate band's name and return band's url."""
if band not in self.bands:
raise InvalidBandName(f"{band} is not valid")
index = self.bands.index(band)
return self.input[index]
@profile(add_to_return=True)
def _read_tile(src_path: List[str], x: int, y: int, z: int,):
with MultiFilesBandsReader(src_path) as src:
return src.tile(x, y, z, bands=src.bands, threads=0) # we set threads to 0 because there is no evidence that it will speed up things for only 3 files
(data, mask), stats = _read_tile(
files,
2314,
1667,
12,
)
print(stats)
>> {
"LIST": {
"count": 0
},
"HEAD": {
"count": 3
},
"GET": {
"count": 9,
"bytes": 4669440,
"ranges": [
"0-32767",
"33521664-34242559",
"36880384-37650431",
"0-32767",
"33505280-34242559",
"36864000-37650431",
"0-32767",
"33816576-34570239",
"37208064-38010879"
]
},
"Timing": 4.864678859710693
} VRTfrom rio_tiler.io import COGReader
from tilebench import profile
from osgeo import gdal
import uuid
files = [
"/vsicurl/https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B04.tif",
"/vsicurl/https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B03.tif",
"/vsicurl/https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/2020/S2A_34SGA_20200318_0_L2A/B02.tif",
]
@profile(add_to_return=True)
def _read_tile(src_path, x, y, z):
vrt_name = uuid.uuid4()
#VSIMEM files do NOT exist on disk and Titiler complains about it so we use a regular file
#vrt_path = f'/vsimem/{vrt_name}.vrt'
vrt_path = f'/vsimem/{vrt_name}.vrt'
# options and resolutions could be suplied by the client??? perhaps
vrt_options = gdal.BuildVRTOptions(resampleAlg='nearest', separate=True, resolution="lowest")
my_vrt = gdal.BuildVRT(vrt_path, files, options=vrt_options)
# this might be necessary only for disk based VRT files I suspect
my_vrt.FlushCache()
with COGReader(vrt_path) as src:
return src.tile(x, y, z)
(data, mask), stats = _read_tile(
files,
2314,
1667,
12,
)
print(stats)
>> {
"LIST": {
"count": 0
},
"HEAD": {
"count": 3
},
"GET": {
"count": 9,
"bytes": 4669440,
"ranges": [
"0-32767",
"0-32767",
"0-32767",
"33521664-34242559",
"33505280-34242559",
"33816576-34570239",
"36880384-37650431",
"36864000-37650431",
"37208064-38010879"
]
},
"Timing": 4.775151014328003
} the Tilebench logs show the difference I was mentioning about the VRT doing the first GET request (0-32767, to get the header) for all the files before we do the tile reading. while for MultiBandReader we iterate over each file. |
Beta Was this translation helpful? Give feedback.
MultiBandReader