Skip to content

Commit

Permalink
task/WG-96: support questionnaires with assets (#131)
Browse files Browse the repository at this point in the history
* Revert "hotfix/disable questionnaire (#127)"

This reverts commit 698da33.

* Add importing of qeustionnaire assets

* Improve importing of assets

Ensure file name is correct and a preview image is created. Also, store info on assets'
geolocation

* Sort the metadata of assets based on filename

* Change logging level for image orientations

* Fix flake8 errors

* Remove unused fixture

* Allow preflight requests

* Allow preflight requests to /

* Add preflighted requests to /assets

* Fix merge issues

* Rename fixture

* Rename fixture and test

* Improve test name

* Improve test name

* Improve rq related comments

* Improve use of quotes
  • Loading branch information
nathanfranklin authored Oct 16, 2023
1 parent a5b3cf7 commit e3ffcae
Show file tree
Hide file tree
Showing 11 changed files with 463 additions and 70 deletions.
55 changes: 46 additions & 9 deletions geoapi/services/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import tempfile
import configparser
import re
from typing import List, IO, Dict

from geoapi.services.videos import VideoService
Expand Down Expand Up @@ -60,12 +61,10 @@ class FeaturesService:
)

ALLOWED_GEOSPATIAL_EXTENSIONS = IMAGE_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS + GEOJSON_FILE_EXTENSIONS\
+ SHAPEFILE_FILE_EXTENSIONS
# RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462
+ SHAPEFILE_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS

ALLOWED_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS + AUDIO_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS\
+ GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS
# RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462
+ GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS

@staticmethod
def get(database_session, featureId: int) -> Feature:
Expand Down Expand Up @@ -240,15 +239,22 @@ def fromShapefile(database_session, projectId: int, fileObj: IO, metadata: Dict,
return features

@staticmethod
def fromRAPP(database_session, projectId: int, fileObj: IO, metadata: Dict, original_path: str = None) -> Feature:
def from_rapp_questionnaire(database_session, projectId: int, fileObj: IO,
additional_files: List[IO], original_path: str = None) -> Feature:
"""
Import RAPP questionnaire
RAPP questionnaire is imported along with any asset images that it
refers to. The asset images are assumed to reside in the same directory
as the questionnaire .rq file.
:param projectId: int
:param fileObj: file descriptor
:param metadata: Dict of <key, val> pairs
:param fileObj: questionnaire rq file
:param additional_files: list of file objs
:param original_path: str path of original file location
:return: Feature
"""
logger.info(f"Processing f{original_path}")
data = json.loads(fileObj.read())

lng = data.get('geolocation')[0].get('longitude')
Expand All @@ -264,9 +270,40 @@ def fromRAPP(database_session, projectId: int, fileObj: IO, metadata: Dict, orig
pathlib.Path(questionnaire_path).mkdir(parents=True, exist_ok=True)
asset_path = os.path.join(questionnaire_path, 'questionnaire.rq')

# write questionnaire rq file
with open(asset_path, 'w') as tmp:
tmp.write(json.dumps(data))

additional_files_properties = []

# write all asset files (i.e jpgs)
if additional_files is not None:
logger.info(f"Processing {len(additional_files)} assets for {original_path}")
for asset_file_obj in additional_files:
base_filename = os.path.basename(asset_file_obj.filename)
image_asset_path = os.path.join(questionnaire_path, base_filename)

# save original jpg (i.e. Q1-Photo-001.jpg)
with open(image_asset_path, 'wb') as image_asset:
image_asset.write(asset_file_obj.read())

# create preview image (i.e. Q1-Photo-001.preview.jpg)
processed_asset_image = ImageService.processImage(asset_file_obj)
path = pathlib.Path(image_asset_path)
processed_asset_image.resized.save(path.with_suffix('.preview' + path.suffix), "JPEG")

# gather coordinates information for this asset
logger.debug(f"{asset_file_obj.filename} has the geospatial coordinates of {processed_asset_image.coordinates}")
additional_files_properties.append({"filename": base_filename,
"coordinates": processed_asset_image.coordinates})
asset_file_obj.close()

if additional_files_properties:
# Sort the list of dictionaries based on 'QX' value and then 'PhotoX' value
additional_files_properties.sort(key=lambda x: tuple(map(int, re.findall(r'\d+', x['filename']))))
# add info about assets to properties (i.e. coordinates of asset) for quick retrieval
feat.properties = {"_hazmapper": {"questionnaire": {"assets": additional_files_properties}}}

fa = FeatureAsset(
uuid=asset_uuid,
asset_type="questionnaire",
Expand Down Expand Up @@ -344,8 +381,8 @@ def fromFileObj(database_session, projectId: int, fileObj: IO,
return FeaturesService.fromShapefile(database_session, projectId, fileObj, {}, additional_files, original_path)
elif ext in FeaturesService.INI_FILE_EXTENSIONS:
return FeaturesService.fromINI(database_session, projectId, fileObj, {}, original_path)
elif False and ext in FeaturesService.RAPP_FILE_EXTENSIONS: # Activate for https://jira.tacc.utexas.edu/browse/DES-2462
return FeaturesService.fromRAPP(database_session, projectId, fileObj, {}, original_path)
elif ext in FeaturesService.RAPP_FILE_EXTENSIONS:
return FeaturesService.from_rapp_questionnaire(database_session, projectId, fileObj, additional_files, original_path)
else:
raise ApiException("Filetype not supported for direct upload. Create a feature and attach as an asset?")

Expand Down
15 changes: 5 additions & 10 deletions geoapi/services/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,9 @@ def _fix_orientation(fileObj: IO) -> PILImage:
# from https://github.com/ianare/exif-py#usage-example
im = Image.open(fileObj)
tags = exifread.process_file(fileObj, details=False)
if "Image Orientation" in tags.keys():
logger.info("yes Image Orientation")
else:
logger.info("no Image Orientation")

if "Image Orientation" in tags.keys():
orientation = tags["Image Orientation"]
logger.info("Orientation: %s (%s)", orientation, orientation.values)
logger.debug("image orientation: %s (%s)", orientation, orientation.values)
val = orientation.values
if 2 in val:
val += [4, 3]
Expand All @@ -95,16 +90,16 @@ def _fix_orientation(fileObj: IO) -> PILImage:
if 7 in val:
val += [4, 8]
if 3 in val:
logger.info("Rotating by 180 degrees.")
logger.debug("Rotating by 180 degrees.")
im = im.transpose(Image.ROTATE_180)
if 4 in val:
logger.info("Mirroring horizontally.")
logger.debug("Mirroring horizontally.")
im = im.transpose(Image.FLIP_TOP_BOTTOM)
if 6 in val:
logger.info("Rotating by 270 degrees.")
logger.debug("Rotating by 270 degrees.")
im = im.transpose(Image.ROTATE_270)
if 8 in val:
logger.info("Rotating by 90 degrees.")
logger.debug("Rotating by 90 degrees.")
im = im.transpose(Image.ROTATE_90)
return im

Expand Down
104 changes: 68 additions & 36 deletions geoapi/tasks/external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import datetime
from celery import uuid as celery_uuid
import json

from geoapi.celery_app import app
from geoapi.exceptions import InvalidCoordinateReferenceSystem, MissingServiceAccount
Expand All @@ -21,6 +22,7 @@
from geoapi.db import create_task_session
from geoapi.services.notifications import NotificationsService
from geoapi.services.users import UserService
from dataclasses import dataclass


class ImportState(Enum):
Expand All @@ -29,6 +31,13 @@ class ImportState(Enum):
RETRYABLE_FAILURE = 3


@dataclass
class AdditionalFile:
"""Represents an additional file with its path and and if its required (i.e. not optional)."""
path: str
required: bool


def _parse_rapid_geolocation(loc):
coords = loc[0]
lat = coords["latitude"]
Expand Down Expand Up @@ -57,47 +66,71 @@ def get_file(client, system_id, path, required):
return system_id, path, required, result_file, error


def get_additional_files(systemId: str, path: str, client, available_files=None):
def get_additional_files(current_file, system_id: str, path: str, client, available_files=None):
"""
Get any additional files needed for processing
:param systemId: str
:param path: str
:param client
Get any additional files needed for processing the current file being imported
Note `available_files` is optional. if provided, then it can be used to fail early if it is known
that a required file is missing
:param str current_file: active file that is being imported
:param str system_id: system of active file
:param path: path of active file
:param client:
:param available_files: list of files that exist (optional)
:return: list of additional files
"""
path = Path(path)
if path.suffix.lower().lstrip('.') == "shp":
paths_to_get = []
additional_files_to_get = []

current_file_path = Path(path)
file_suffix = current_file_path.suffix.lower().lstrip('.')
if file_suffix == "shp":
logger.info(f"Determining which shapefile-related files need to be downloaded for file {current_file.filename}")
for extension, required in SHAPEFILE_FILE_ADDITIONAL_FILES.items():
additional_file_path = path.with_suffix(extension)
additional_file_path = current_file_path.with_suffix(extension)
if available_files and str(additional_file_path) not in available_files:
if required:
logger.error("Could not import required shapefile-related file: "
"agave: {} :: {}".format(systemId, additional_file_path))
raise Exception("Required file ({}) missing".format(additional_file_path))
logger.error(f"Could not import required shapefile-related file: agave: {system_id}/{additional_file_path}")
raise Exception(f"Required file ({system_id}/{additional_file_path}) missing")
else:
continue
paths_to_get.append(additional_file_path)

additional_files = []
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
getting_files_futures = [executor.submit(get_file, client, systemId, additional_file_path, required)
for additional_file_path in paths_to_get]
for future in concurrent.futures.as_completed(getting_files_futures):
_, additional_file_path, required, result_file, error = future.result()
if not result_file and required:
logger.error("Could not import a required shapefile-related file: "
"agave: {} :: {} ---- error: {}".format(systemId, additional_file_path, error))
if not result_file:
logger.debug("Unable to get non-required shapefile-related file: "
"agave: {} :: {}".format(systemId, additional_file_path))
continue
result_file.filename = Path(additional_file_path).name
additional_files.append(result_file)
additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=required))
elif file_suffix == "rq":
logger.info(f"Parsing rq file {current_file.filename} to see what assets need to be downloaded ")
data = json.load(current_file)
for section in data["sections"]:
for question in section["questions"]:
for asset in question.get("assets", []):
# determine full path for this asset and add to list
additional_file_path = current_file_path.with_name(asset["filename"])
additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=True))
logger.info(f"{len(additional_files_to_get)} assets were found for rq file {current_file.filename}")

# Seek back to start of file
current_file.seek(0)
else:
additional_files = None
return additional_files
return None

# Try to get all additional files.
additional_files_result = []
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
getting_files_futures = [executor.submit(get_file, client, system_id, additional_file.path, additional_file.required)
for additional_file in additional_files_to_get]
for future in concurrent.futures.as_completed(getting_files_futures):
_, additional_file_path, required, result_file, error = future.result()
if not result_file and required:
logger.error(f"Could not import a required {file_suffix}-related file: "
f"agave: {system_id} :: {additional_file_path} ---- error: {error}")
raise Exception(f"Required file ({system_id}/{additional_file_path}) missing")
if not result_file:
logger.error(f"Unable to get non-required {file_suffix}-related file: "
f"agave: {system_id} :: {additional_file_path} ---- error: {error}")

continue
logger.debug(f"Finished getting {file_suffix}-related file: ({system_id}/{additional_file_path}")
result_file.filename = Path(additional_file_path).name
additional_files_result.append(result_file)
return additional_files_result


@app.task(rate_limit="10/s")
Expand All @@ -111,10 +144,9 @@ def import_file_from_agave(userId: int, systemId: str, path: str, projectId: int
try:
user = session.query(User).get(userId)
client = AgaveUtils(user.jwt)

temp_file = client.getFile(systemId, path)
temp_file.filename = Path(path).name
additional_files = get_additional_files(systemId, path, client)
additional_files = get_additional_files(temp_file, systemId, path, client)
FeaturesService.fromFileObj(session, projectId, temp_file, {},
original_path=path, additional_files=additional_files)
NotificationsService.create(session, user, "success", "Imported {f}".format(f=path))
Expand Down Expand Up @@ -185,12 +217,12 @@ def import_point_clouds_from_agave(userId: int, files, pointCloudId: int):
except InvalidCoordinateReferenceSystem:
logger.error("Could not import point cloud file due to missing"
" coordinate reference system: {}:{}".format(system_id, path))
failed_message = 'Error importing {}: missing coordinate reference system'.format(path)
failed_message = "Error importing {}: missing coordinate reference system".format(path)
except Exception as e:
logger.error("Could not import point cloud file for user:{} from tapis: {}/{} : {}".format(user.username,
system_id,
path, e))
failed_message = 'Unknown error importing {}:{}'.format(system_id, path)
failed_message = "Unknown error importing {}:{}".format(system_id, path)

if failed_message:
for file_path in new_asset_files:
Expand Down Expand Up @@ -331,7 +363,7 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId:
logger.info("importing:{} for user:{}".format(item_system_path, user.username))
tmp_file = client.getFile(systemId, item.path)
tmp_file.filename = Path(item.path).name
additional_files = get_additional_files(systemId, item.path, client, filenames_in_directory)
additional_files = get_additional_files(tmp_file, systemId, item.path, client, available_files=filenames_in_directory)
FeaturesService.fromFileObj(session, projectId, tmp_file, {},
original_path=item_system_path, additional_files=additional_files)
NotificationsService.create(session, user, "success", "Imported {f}".format(f=item_system_path))
Expand Down
24 changes: 21 additions & 3 deletions geoapi/tests/api_tests/test_feature_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,29 @@ def test_create_tile_server_from_file(projects_fixture, tile_server_ini_file_fix
assert tile_server.attribution == "OpenStreetMap contributorshttps://www.openstreetmap.org/copyright"


def test_create_questionnaire_feature(projects_fixture, questionnaire_file_fixture):
feature = FeaturesService.fromRAPP(db_session, projects_fixture.id, questionnaire_file_fixture, metadata={})
def test_create_questionnaire_feature(projects_fixture, questionnaire_file_without_assets_fixture):
feature = FeaturesService.from_rapp_questionnaire(db_session, projects_fixture.id,
questionnaire_file_without_assets_fixture,
additional_files=None)
assert feature.project_id == projects_fixture.id
assert len(feature.assets) == 1
assert db_session.query(Feature).count() == 1
assert db_session.query(FeatureAsset).count() == 1
assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1
assert os.path.isfile(os.path.join(get_project_asset_dir(projects_fixture.id), str(feature.assets[0].uuid) + "/questionnaire.rq"))
assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 1
assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq"))


def test_create_questionnaire_feature_with_assets(projects_fixture, questionnaire_file_with_assets_fixture, image_file_fixture):
assets = [image_file_fixture]
feature = FeaturesService.from_rapp_questionnaire(db_session, projects_fixture.id,
questionnaire_file_with_assets_fixture, additional_files=assets)
assert feature.project_id == projects_fixture.id
assert len(feature.assets) == 1
assert db_session.query(Feature).count() == 1
assert db_session.query(FeatureAsset).count() == 1
assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1
assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 3
assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq"))
assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.preview.jpg"))
assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.jpg"))
16 changes: 14 additions & 2 deletions geoapi/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def gpx_file_fixture():
def image_file_fixture():
home = os.path.dirname(__file__)
with open(os.path.join(home, 'fixtures/image.jpg'), 'rb') as f:
f.filename = 'image.jpg'
yield f


Expand Down Expand Up @@ -473,7 +474,18 @@ def tile_server_ini_file_fixture():


@pytest.fixture(scope="function")
def questionnaire_file_fixture():
def questionnaire_file_without_assets_fixture():
home = os.path.dirname(__file__)
with open(os.path.join(home, 'fixtures/questionnaire.rq'), 'rb') as f:
filename = 'fixtures/questionnaire_without_assets.rq'
with open(os.path.join(home, filename), 'rb') as f:
f.filename = filename
yield f


@pytest.fixture(scope="function")
def questionnaire_file_with_assets_fixture():
home = os.path.dirname(__file__)
filename = 'fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq'
with open(os.path.join(home, filename), 'rb') as f:
f.filename = filename
yield f
Loading

0 comments on commit e3ffcae

Please sign in to comment.