Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

task/WG-96: support questionnaires with assets #131

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ebcb2bf
Revert "hotfix/disable questionnaire (#127)"
nathanfranklin Jun 8, 2023
14a93fd
Add importing of qeustionnaire assets
nathanfranklin Jun 29, 2023
74e2aed
Improve importing of assets
nathanfranklin Jun 29, 2023
9e1c60d
Sort the metadata of assets based on filename
nathanfranklin Jun 29, 2023
5868c95
Change logging level for image orientations
nathanfranklin Jun 29, 2023
27c8de8
Fix flake8 errors
nathanfranklin Jun 29, 2023
1b82fae
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Jun 29, 2023
6e7c9cb
Remove unused fixture
nathanfranklin Jun 29, 2023
69d6a6c
Allow preflight requests
nathanfranklin Jul 4, 2023
dfde965
Allow preflight requests to /
nathanfranklin Jul 5, 2023
192a5f7
Add preflighted requests to /assets
nathanfranklin Jul 5, 2023
a3c162a
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Aug 15, 2023
a586d70
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Aug 25, 2023
2974ef5
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Sep 28, 2023
6e2b2f0
Fix merge issues
nathanfranklin Sep 28, 2023
945559c
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Sep 29, 2023
b740fb8
Merge branch 'master' into task/WG-96-support-questionnaires-with-assets
nathanfranklin Oct 5, 2023
df2217e
Rename fixture
nathanfranklin Oct 13, 2023
587f212
Rename fixture and test
nathanfranklin Oct 13, 2023
c527415
Improve test name
nathanfranklin Oct 13, 2023
117abfa
Improve test name
nathanfranklin Oct 13, 2023
1772da6
Improve rq related comments
nathanfranklin Oct 13, 2023
65eeef8
Improve use of quotes
nathanfranklin Oct 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions geoapi/services/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import tempfile
import configparser
import re
from typing import List, IO, Dict

from geoapi.services.videos import VideoService
Expand Down Expand Up @@ -61,12 +62,10 @@ class FeaturesService:
)

ALLOWED_GEOSPATIAL_EXTENSIONS = IMAGE_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS + GEOJSON_FILE_EXTENSIONS\
+ SHAPEFILE_FILE_EXTENSIONS
# RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462
+ SHAPEFILE_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS

ALLOWED_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS + AUDIO_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS\
+ GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS
# RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462
+ GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS

@staticmethod
def get(featureId: int) -> Feature:
Expand Down Expand Up @@ -241,15 +240,21 @@ def fromShapefile(projectId: int, fileObj: IO, metadata: Dict, additional_files:
return features

@staticmethod
def fromRAPP(projectId: int, fileObj: IO, metadata: Dict, original_path: str = None) -> Feature:
def from_rapp_questionnaire(projectId: int, fileObj: IO, additional_files: List[IO], original_path: str = None) -> Feature:
"""
Import RAPP questionnaire

RAPP questionnaire is imported along with any asset images that it
refers to. The asset images are assumed to reside in the same directory
as the questionnaire .rq file.

:param projectId: int
:param fileObj: file descriptor
:param metadata: Dict of <key, val> pairs
:param fileObj: questionnaire file
:param additional_files: list of file objs
:param original_path: str path of original file location
:return: Feature
"""
logger.info(f"Processing f{original_path}")
data = json.loads(fileObj.read())

lng = data.get('geolocation')[0].get('longitude')
Expand All @@ -265,9 +270,40 @@ def fromRAPP(projectId: int, fileObj: IO, metadata: Dict, original_path: str = N
pathlib.Path(questionnaire_path).mkdir(parents=True, exist_ok=True)
asset_path = os.path.join(questionnaire_path, 'questionnaire.rq')

# write questionnaire file (rq)
taoteg marked this conversation as resolved.
Show resolved Hide resolved
with open(asset_path, 'w') as tmp:
tmp.write(json.dumps(data))

additional_files_properties = []

# write all asset files (i.e jpgs)
if additional_files is not None:
logger.info(f"Processing {len(additional_files)} assets for {original_path}")
for asset_file_obj in additional_files:
base_filename = os.path.basename(asset_file_obj.filename)
image_asset_path = os.path.join(questionnaire_path, base_filename)

# save original jpg (i.e. Q1-Photo-001.jpg)
with open(image_asset_path, 'wb') as image_asset:
image_asset.write(asset_file_obj.read())

# create preview image (i.e. Q1-Photo-001.preview.jpg)
processed_asset_image = ImageService.processImage(asset_file_obj)
path = pathlib.Path(image_asset_path)
processed_asset_image.resized.save(path.with_suffix('.preview' + path.suffix), "JPEG")

# gather coordinates information for this asset
logger.debug(f"{asset_file_obj.filename} has the geospatial coordinates of {processed_asset_image.coordinates}")
additional_files_properties.append({"filename": base_filename,
"coordinates": processed_asset_image.coordinates})
asset_file_obj.close()

if additional_files_properties:
# Sort the list of dictionaries based on 'QX' value and then 'PhotoX' value
additional_files_properties.sort(key=lambda x: tuple(map(int, re.findall(r'\d+', x['filename']))))
# add info about assets to properties (i.e. coordinates of asset) for quick retrieval
feat.properties = {"_hazmapper": {"questionnaire": {"assets": additional_files_properties}}}

fa = FeatureAsset(
uuid=asset_uuid,
asset_type="questionnaire",
Expand Down Expand Up @@ -344,8 +380,8 @@ def fromFileObj(projectId: int, fileObj: IO, metadata: Dict, original_path: str
return FeaturesService.fromShapefile(projectId, fileObj, {}, additional_files, original_path)
elif ext in FeaturesService.INI_FILE_EXTENSIONS:
return FeaturesService.fromINI(projectId, fileObj, {}, original_path)
elif False and ext in FeaturesService.RAPP_FILE_EXTENSIONS: # Activate for https://jira.tacc.utexas.edu/browse/DES-2462
return FeaturesService.fromRAPP(projectId, fileObj, {}, original_path)
elif ext in FeaturesService.RAPP_FILE_EXTENSIONS:
return FeaturesService.from_rapp_questionnaire(projectId, fileObj, additional_files, original_path)
else:
raise ApiException("Filetype not supported for direct upload. Create a feature and attach as an asset?")

Expand Down
15 changes: 5 additions & 10 deletions geoapi/services/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,9 @@ def _fix_orientation(fileObj: IO) -> PILImage:
# from https://github.com/ianare/exif-py#usage-example
nathanfranklin marked this conversation as resolved.
Show resolved Hide resolved
im = Image.open(fileObj)
tags = exifread.process_file(fileObj, details=False)
if "Image Orientation" in tags.keys():
logger.info("yes Image Orientation")
else:
logger.info("no Image Orientation")

if "Image Orientation" in tags.keys():
orientation = tags["Image Orientation"]
logger.info("Orientation: %s (%s)", orientation, orientation.values)
logger.debug("image orientation: %s (%s)", orientation, orientation.values)
val = orientation.values
if 2 in val:
val += [4, 3]
Expand All @@ -95,16 +90,16 @@ def _fix_orientation(fileObj: IO) -> PILImage:
if 7 in val:
val += [4, 8]
if 3 in val:
logger.info("Rotating by 180 degrees.")
logger.debug("Rotating by 180 degrees.")
im = im.transpose(Image.ROTATE_180)
if 4 in val:
logger.info("Mirroring horizontally.")
logger.debug("Mirroring horizontally.")
im = im.transpose(Image.FLIP_TOP_BOTTOM)
if 6 in val:
logger.info("Rotating by 270 degrees.")
logger.debug("Rotating by 270 degrees.")
im = im.transpose(Image.ROTATE_270)
if 8 in val:
logger.info("Rotating by 90 degrees.")
logger.debug("Rotating by 90 degrees.")
im = im.transpose(Image.ROTATE_90)
return im

Expand Down
98 changes: 65 additions & 33 deletions geoapi/tasks/external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from geoapi.db import db_session
from geoapi.services.notifications import NotificationsService
from geoapi.services.users import UserService
from dataclasses import dataclass


class ImportState(Enum):
Expand All @@ -30,6 +31,13 @@ class ImportState(Enum):
RETRYABLE_FAILURE = 3


@dataclass
class AdditionalFile:
"""Represents an additional file with its path and and if its required (i.e. not optional)."""
path: str
required: bool


def _parse_rapid_geolocation(loc):
coords = loc[0]
lat = coords["latitude"]
Expand Down Expand Up @@ -58,47 +66,71 @@ def get_file(client, system_id, path, required):
return system_id, path, required, result_file, error


def get_additional_files(systemId: str, path: str, client, available_files=None):
def get_additional_files(current_file, system_id: str, path: str, client, available_files=None):
"""
Get any additional files needed for processing
:param systemId: str
:param path: str
:param client
Get any additional files needed for processing the current file being imported

Note `available_files` is optional. if provided, then it can be used to fail early if it is known
that a required file is missing

:param str current_file: active file that is being imported
:param str system_id: system of active file
:param path: path of active file
:param client:
:param available_files: list of files that exist (optional)
:return: list of additional files
"""
path = Path(path)
if path.suffix.lower().lstrip('.') == "shp":
paths_to_get = []
additional_files_to_get = []

current_file_path = Path(path)
file_suffix = current_file_path.suffix.lower().lstrip('.')
if file_suffix == "shp":
logger.info(f'Determining which shapefile-related files need to be downloaded for file {current_file.filename}')
for extension, required in SHAPEFILE_FILE_ADDITIONAL_FILES.items():
additional_file_path = path.with_suffix(extension)
additional_file_path = current_file_path.with_suffix(extension)
if available_files and str(additional_file_path) not in available_files:
if required:
logger.error("Could not import required shapefile-related file: "
"agave: {} :: {}".format(systemId, additional_file_path))
raise Exception("Required file ({}) missing".format(additional_file_path))
logger.error(f"Could not import required shapefile-related file: agave: {system_id}/{additional_file_path}")
raise Exception(f'Required file ({system_id}/{additional_file_path}) missing')
else:
continue
paths_to_get.append(additional_file_path)

additional_files = []
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
getting_files_futures = [executor.submit(get_file, client, systemId, additional_file_path, required)
for additional_file_path in paths_to_get]
for future in concurrent.futures.as_completed(getting_files_futures):
_, additional_file_path, required, result_file, error = future.result()
if not result_file and required:
logger.error("Could not import a required shapefile-related file: "
"agave: {} :: {} ---- error: {}".format(systemId, additional_file_path, error))
if not result_file:
logger.debug("Unable to get non-required shapefile-related file: "
"agave: {} :: {}".format(systemId, additional_file_path))
continue
result_file.filename = Path(additional_file_path).name
additional_files.append(result_file)
additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=required))
elif file_suffix == "rq":
logger.info(f'Parsing rq file {current_file.filename} to see what assets need to be downloaded ')
data = json.load(current_file)
for section in data["sections"]:
for question in section["questions"]:
for asset in question.get("assets", []):
# determine full path for this asset and add to list
additional_file_path = current_file_path.with_name(asset["filename"])
additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=True))
logger.info(f'{len(additional_files_to_get)} assets were found for rq file {current_file.filename}')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an inconsistency with using single and double quotes through this file. Could we normalize that as much as possible to one of the two?
Compare lines 88, 93, 94, 99, 107, 122, etc.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know that TACC WMA has a preference but I tend to prefer this usage:

  • raw string literals for regular expressions
  • single quotes for small symbol-like strings
  • double quotes around strings that are used for interpolation or that are natural language messages (break the rules as needed if the strings contain quotes to escape)
  • triple double quotes for docstrings

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took a stab at this in 65eeef8. let me know if that matches up with your comment.


# Seek back to start of file
current_file.seek(0)
else:
additional_files = None
return additional_files
return None

# Try to get all additional files.
additional_files_result = []
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
getting_files_futures = [executor.submit(get_file, client, system_id, additional_file.path, additional_file.required)
for additional_file in additional_files_to_get]
for future in concurrent.futures.as_completed(getting_files_futures):
_, additional_file_path, required, result_file, error = future.result()
if not result_file and required:
logger.error(f'Could not import a required {file_suffix}-related file: '
f'agave: {system_id} :: {additional_file_path} ---- error: {error}')
raise Exception(f'Required file ({system_id}/{additional_file_path}) missing')
if not result_file:
logger.error(f'Unable to get non-required {file_suffix}-related file: '
f'agave: {system_id} :: {additional_file_path} ---- error: {error}')

continue
logger.debug(f'Finished getting {file_suffix}-related file: ({system_id}/{additional_file_path}')
result_file.filename = Path(additional_file_path).name
additional_files_result.append(result_file)
return additional_files_result


@app.task(rate_limit="10/s")
Expand All @@ -113,7 +145,7 @@ def import_file_from_agave(userId: int, systemId: str, path: str, projectId: int
try:
tmpFile = client.getFile(systemId, path)
tmpFile.filename = Path(path).name
additional_files = get_additional_files(systemId, path, client)
additional_files = get_additional_files(tmpFile, systemId, path, client)
FeaturesService.fromFileObj(projectId, tmpFile, {}, original_path=path, additional_files=additional_files)
NotificationsService.create(user, "success", "Imported {f}".format(f=path))
tmpFile.close()
Expand Down Expand Up @@ -315,7 +347,7 @@ def import_from_agave(tenant_id: str, userId: int, systemId: str, path: str, pro
logger.info("importing:{} for user:{}".format(item_system_path, user.username))
tmpFile = client.getFile(systemId, item.path)
tmpFile.filename = Path(item.path).name
additional_files = get_additional_files(systemId, item.path, client, filenames_in_directory)
additional_files = get_additional_files(tmpFile, systemId, item.path, client, available_files=filenames_in_directory)
FeaturesService.fromFileObj(projectId, tmpFile, {},
original_path=item_system_path, additional_files=additional_files)
NotificationsService.create(user, "success", "Imported {f}".format(f=item_system_path))
Expand Down
19 changes: 17 additions & 2 deletions geoapi/tests/api_tests/test_feature_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,25 @@ def test_create_tile_server_from_file(projects_fixture, tile_server_ini_file_fix


def test_create_questionnaire_feature(projects_fixture, questionnaire_file_fixture):
feature = FeaturesService.fromRAPP(projects_fixture.id, questionnaire_file_fixture, metadata={})
feature = FeaturesService.from_rapp_questionnaire(projects_fixture.id, questionnaire_file_fixture, additional_files=None)
assert feature.project_id == projects_fixture.id
assert len(feature.assets) == 1
assert db_session.query(Feature).count() == 1
assert db_session.query(FeatureAsset).count() == 1
assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1
assert os.path.isfile(os.path.join(get_project_asset_dir(projects_fixture.id), str(feature.assets[0].uuid) + "/questionnaire.rq"))
assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 1
assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq"))


def test_create_questionnaire_feature_with_assets(projects_fixture, questionnaire_file_with_assets_fixture, image_file_fixture):
assets = [image_file_fixture]
feature = FeaturesService.from_rapp_questionnaire(projects_fixture.id, questionnaire_file_with_assets_fixture, additional_files=assets)
assert feature.project_id == projects_fixture.id
assert len(feature.assets) == 1
assert db_session.query(Feature).count() == 1
assert db_session.query(FeatureAsset).count() == 1
assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1
assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 3
assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq"))
assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.preview.jpg"))
assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.jpg"))
14 changes: 13 additions & 1 deletion geoapi/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def gpx_file_fixture():
def image_file_fixture():
home = os.path.dirname(__file__)
with open(os.path.join(home, 'fixtures/image.jpg'), 'rb') as f:
f.filename = 'image.jpg'
yield f


Expand Down Expand Up @@ -480,5 +481,16 @@ def tile_server_ini_file_fixture():
@pytest.fixture(scope="function")
def questionnaire_file_fixture():
home = os.path.dirname(__file__)
with open(os.path.join(home, 'fixtures/questionnaire.rq'), 'rb') as f:
filename = 'fixtures/questionnaire.rq'
with open(os.path.join(home, filename), 'rb') as f:
f.filename = filename
yield f


@pytest.fixture(scope="function")
def questionnaire_file_with_assets_fixture():
home = os.path.dirname(__file__)
filename = 'fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq'
with open(os.path.join(home, filename), 'rb') as f:
f.filename = filename
yield f
Loading