From e3ffcaeadbddd89e5e4038686aae5f1b59e1d088 Mon Sep 17 00:00:00 2001 From: Nathan Franklin Date: Mon, 16 Oct 2023 10:29:56 -0500 Subject: [PATCH 1/3] task/WG-96: support questionnaires with assets (#131) * Revert "hotfix/disable questionnaire (#127)" This reverts commit 698da33ccca6327b77162ac77de617d1ffbae397. * Add importing of qeustionnaire assets * Improve importing of assets Ensure file name is correct and a preview image is created. Also, store info on assets' geolocation * Sort the metadata of assets based on filename * Change logging level for image orientations * Fix flake8 errors * Remove unused fixture * Allow preflight requests * Allow preflight requests to / * Add preflighted requests to /assets * Fix merge issues * Rename fixture * Rename fixture and test * Improve test name * Improve test name * Improve rq related comments * Improve use of quotes --- geoapi/services/features.py | 55 ++++- geoapi/services/images.py | 15 +- geoapi/tasks/external_data.py | 104 ++++++--- .../tests/api_tests/test_feature_service.py | 24 +- geoapi/tests/conftest.py | 16 +- .../external_data_tests/test_external_data.py | 37 ++- geoapi/tests/fixtures/questionnaire.rq | 1 - .../questionnaire_with_assets.rq | 56 +++++ .../fixtures/questionnaire_without_assets.rq | 214 ++++++++++++++++++ geoapi/utils/agave.py | 1 + kube/geoapi.yaml | 10 + 11 files changed, 463 insertions(+), 70 deletions(-) delete mode 100644 geoapi/tests/fixtures/questionnaire.rq create mode 100644 geoapi/tests/fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq create mode 100644 geoapi/tests/fixtures/questionnaire_without_assets.rq diff --git a/geoapi/services/features.py b/geoapi/services/features.py index d45cc77f..3f95b531 100644 --- a/geoapi/services/features.py +++ b/geoapi/services/features.py @@ -4,6 +4,7 @@ import json import tempfile import configparser +import re from typing import List, IO, Dict from geoapi.services.videos import VideoService @@ -60,12 +61,10 @@ class FeaturesService: ) ALLOWED_GEOSPATIAL_EXTENSIONS = IMAGE_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS + GEOJSON_FILE_EXTENSIONS\ - + SHAPEFILE_FILE_EXTENSIONS - # RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462 + + SHAPEFILE_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS ALLOWED_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS + AUDIO_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS\ - + GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS - # RAPP_FILE_EXTENSIONS to be added in https://jira.tacc.utexas.edu/browse/DES-2462 + + GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS @staticmethod def get(database_session, featureId: int) -> Feature: @@ -240,15 +239,22 @@ def fromShapefile(database_session, projectId: int, fileObj: IO, metadata: Dict, return features @staticmethod - def fromRAPP(database_session, projectId: int, fileObj: IO, metadata: Dict, original_path: str = None) -> Feature: + def from_rapp_questionnaire(database_session, projectId: int, fileObj: IO, + additional_files: List[IO], original_path: str = None) -> Feature: """ + Import RAPP questionnaire + + RAPP questionnaire is imported along with any asset images that it + refers to. The asset images are assumed to reside in the same directory + as the questionnaire .rq file. :param projectId: int - :param fileObj: file descriptor - :param metadata: Dict of pairs + :param fileObj: questionnaire rq file + :param additional_files: list of file objs :param original_path: str path of original file location :return: Feature """ + logger.info(f"Processing f{original_path}") data = json.loads(fileObj.read()) lng = data.get('geolocation')[0].get('longitude') @@ -264,9 +270,40 @@ def fromRAPP(database_session, projectId: int, fileObj: IO, metadata: Dict, orig pathlib.Path(questionnaire_path).mkdir(parents=True, exist_ok=True) asset_path = os.path.join(questionnaire_path, 'questionnaire.rq') + # write questionnaire rq file with open(asset_path, 'w') as tmp: tmp.write(json.dumps(data)) + additional_files_properties = [] + + # write all asset files (i.e jpgs) + if additional_files is not None: + logger.info(f"Processing {len(additional_files)} assets for {original_path}") + for asset_file_obj in additional_files: + base_filename = os.path.basename(asset_file_obj.filename) + image_asset_path = os.path.join(questionnaire_path, base_filename) + + # save original jpg (i.e. Q1-Photo-001.jpg) + with open(image_asset_path, 'wb') as image_asset: + image_asset.write(asset_file_obj.read()) + + # create preview image (i.e. Q1-Photo-001.preview.jpg) + processed_asset_image = ImageService.processImage(asset_file_obj) + path = pathlib.Path(image_asset_path) + processed_asset_image.resized.save(path.with_suffix('.preview' + path.suffix), "JPEG") + + # gather coordinates information for this asset + logger.debug(f"{asset_file_obj.filename} has the geospatial coordinates of {processed_asset_image.coordinates}") + additional_files_properties.append({"filename": base_filename, + "coordinates": processed_asset_image.coordinates}) + asset_file_obj.close() + + if additional_files_properties: + # Sort the list of dictionaries based on 'QX' value and then 'PhotoX' value + additional_files_properties.sort(key=lambda x: tuple(map(int, re.findall(r'\d+', x['filename'])))) + # add info about assets to properties (i.e. coordinates of asset) for quick retrieval + feat.properties = {"_hazmapper": {"questionnaire": {"assets": additional_files_properties}}} + fa = FeatureAsset( uuid=asset_uuid, asset_type="questionnaire", @@ -344,8 +381,8 @@ def fromFileObj(database_session, projectId: int, fileObj: IO, return FeaturesService.fromShapefile(database_session, projectId, fileObj, {}, additional_files, original_path) elif ext in FeaturesService.INI_FILE_EXTENSIONS: return FeaturesService.fromINI(database_session, projectId, fileObj, {}, original_path) - elif False and ext in FeaturesService.RAPP_FILE_EXTENSIONS: # Activate for https://jira.tacc.utexas.edu/browse/DES-2462 - return FeaturesService.fromRAPP(database_session, projectId, fileObj, {}, original_path) + elif ext in FeaturesService.RAPP_FILE_EXTENSIONS: + return FeaturesService.from_rapp_questionnaire(database_session, projectId, fileObj, additional_files, original_path) else: raise ApiException("Filetype not supported for direct upload. Create a feature and attach as an asset?") diff --git a/geoapi/services/images.py b/geoapi/services/images.py index 0e079b7d..417908b9 100644 --- a/geoapi/services/images.py +++ b/geoapi/services/images.py @@ -79,14 +79,9 @@ def _fix_orientation(fileObj: IO) -> PILImage: # from https://github.com/ianare/exif-py#usage-example im = Image.open(fileObj) tags = exifread.process_file(fileObj, details=False) - if "Image Orientation" in tags.keys(): - logger.info("yes Image Orientation") - else: - logger.info("no Image Orientation") - if "Image Orientation" in tags.keys(): orientation = tags["Image Orientation"] - logger.info("Orientation: %s (%s)", orientation, orientation.values) + logger.debug("image orientation: %s (%s)", orientation, orientation.values) val = orientation.values if 2 in val: val += [4, 3] @@ -95,16 +90,16 @@ def _fix_orientation(fileObj: IO) -> PILImage: if 7 in val: val += [4, 8] if 3 in val: - logger.info("Rotating by 180 degrees.") + logger.debug("Rotating by 180 degrees.") im = im.transpose(Image.ROTATE_180) if 4 in val: - logger.info("Mirroring horizontally.") + logger.debug("Mirroring horizontally.") im = im.transpose(Image.FLIP_TOP_BOTTOM) if 6 in val: - logger.info("Rotating by 270 degrees.") + logger.debug("Rotating by 270 degrees.") im = im.transpose(Image.ROTATE_270) if 8 in val: - logger.info("Rotating by 90 degrees.") + logger.debug("Rotating by 90 degrees.") im = im.transpose(Image.ROTATE_90) return im diff --git a/geoapi/tasks/external_data.py b/geoapi/tasks/external_data.py index 7ba7d4d0..b7d602b0 100644 --- a/geoapi/tasks/external_data.py +++ b/geoapi/tasks/external_data.py @@ -6,6 +6,7 @@ import time import datetime from celery import uuid as celery_uuid +import json from geoapi.celery_app import app from geoapi.exceptions import InvalidCoordinateReferenceSystem, MissingServiceAccount @@ -21,6 +22,7 @@ from geoapi.db import create_task_session from geoapi.services.notifications import NotificationsService from geoapi.services.users import UserService +from dataclasses import dataclass class ImportState(Enum): @@ -29,6 +31,13 @@ class ImportState(Enum): RETRYABLE_FAILURE = 3 +@dataclass +class AdditionalFile: + """Represents an additional file with its path and and if its required (i.e. not optional).""" + path: str + required: bool + + def _parse_rapid_geolocation(loc): coords = loc[0] lat = coords["latitude"] @@ -57,47 +66,71 @@ def get_file(client, system_id, path, required): return system_id, path, required, result_file, error -def get_additional_files(systemId: str, path: str, client, available_files=None): +def get_additional_files(current_file, system_id: str, path: str, client, available_files=None): """ - Get any additional files needed for processing - :param systemId: str - :param path: str - :param client + Get any additional files needed for processing the current file being imported + + Note `available_files` is optional. if provided, then it can be used to fail early if it is known + that a required file is missing + + :param str current_file: active file that is being imported + :param str system_id: system of active file + :param path: path of active file + :param client: :param available_files: list of files that exist (optional) :return: list of additional files """ - path = Path(path) - if path.suffix.lower().lstrip('.') == "shp": - paths_to_get = [] + additional_files_to_get = [] + + current_file_path = Path(path) + file_suffix = current_file_path.suffix.lower().lstrip('.') + if file_suffix == "shp": + logger.info(f"Determining which shapefile-related files need to be downloaded for file {current_file.filename}") for extension, required in SHAPEFILE_FILE_ADDITIONAL_FILES.items(): - additional_file_path = path.with_suffix(extension) + additional_file_path = current_file_path.with_suffix(extension) if available_files and str(additional_file_path) not in available_files: if required: - logger.error("Could not import required shapefile-related file: " - "agave: {} :: {}".format(systemId, additional_file_path)) - raise Exception("Required file ({}) missing".format(additional_file_path)) + logger.error(f"Could not import required shapefile-related file: agave: {system_id}/{additional_file_path}") + raise Exception(f"Required file ({system_id}/{additional_file_path}) missing") else: continue - paths_to_get.append(additional_file_path) - - additional_files = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - getting_files_futures = [executor.submit(get_file, client, systemId, additional_file_path, required) - for additional_file_path in paths_to_get] - for future in concurrent.futures.as_completed(getting_files_futures): - _, additional_file_path, required, result_file, error = future.result() - if not result_file and required: - logger.error("Could not import a required shapefile-related file: " - "agave: {} :: {} ---- error: {}".format(systemId, additional_file_path, error)) - if not result_file: - logger.debug("Unable to get non-required shapefile-related file: " - "agave: {} :: {}".format(systemId, additional_file_path)) - continue - result_file.filename = Path(additional_file_path).name - additional_files.append(result_file) + additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=required)) + elif file_suffix == "rq": + logger.info(f"Parsing rq file {current_file.filename} to see what assets need to be downloaded ") + data = json.load(current_file) + for section in data["sections"]: + for question in section["questions"]: + for asset in question.get("assets", []): + # determine full path for this asset and add to list + additional_file_path = current_file_path.with_name(asset["filename"]) + additional_files_to_get.append(AdditionalFile(path=additional_file_path, required=True)) + logger.info(f"{len(additional_files_to_get)} assets were found for rq file {current_file.filename}") + + # Seek back to start of file + current_file.seek(0) else: - additional_files = None - return additional_files + return None + + # Try to get all additional files. + additional_files_result = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + getting_files_futures = [executor.submit(get_file, client, system_id, additional_file.path, additional_file.required) + for additional_file in additional_files_to_get] + for future in concurrent.futures.as_completed(getting_files_futures): + _, additional_file_path, required, result_file, error = future.result() + if not result_file and required: + logger.error(f"Could not import a required {file_suffix}-related file: " + f"agave: {system_id} :: {additional_file_path} ---- error: {error}") + raise Exception(f"Required file ({system_id}/{additional_file_path}) missing") + if not result_file: + logger.error(f"Unable to get non-required {file_suffix}-related file: " + f"agave: {system_id} :: {additional_file_path} ---- error: {error}") + + continue + logger.debug(f"Finished getting {file_suffix}-related file: ({system_id}/{additional_file_path}") + result_file.filename = Path(additional_file_path).name + additional_files_result.append(result_file) + return additional_files_result @app.task(rate_limit="10/s") @@ -111,10 +144,9 @@ def import_file_from_agave(userId: int, systemId: str, path: str, projectId: int try: user = session.query(User).get(userId) client = AgaveUtils(user.jwt) - temp_file = client.getFile(systemId, path) temp_file.filename = Path(path).name - additional_files = get_additional_files(systemId, path, client) + additional_files = get_additional_files(temp_file, systemId, path, client) FeaturesService.fromFileObj(session, projectId, temp_file, {}, original_path=path, additional_files=additional_files) NotificationsService.create(session, user, "success", "Imported {f}".format(f=path)) @@ -185,12 +217,12 @@ def import_point_clouds_from_agave(userId: int, files, pointCloudId: int): except InvalidCoordinateReferenceSystem: logger.error("Could not import point cloud file due to missing" " coordinate reference system: {}:{}".format(system_id, path)) - failed_message = 'Error importing {}: missing coordinate reference system'.format(path) + failed_message = "Error importing {}: missing coordinate reference system".format(path) except Exception as e: logger.error("Could not import point cloud file for user:{} from tapis: {}/{} : {}".format(user.username, system_id, path, e)) - failed_message = 'Unknown error importing {}:{}'.format(system_id, path) + failed_message = "Unknown error importing {}:{}".format(system_id, path) if failed_message: for file_path in new_asset_files: @@ -331,7 +363,7 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId: logger.info("importing:{} for user:{}".format(item_system_path, user.username)) tmp_file = client.getFile(systemId, item.path) tmp_file.filename = Path(item.path).name - additional_files = get_additional_files(systemId, item.path, client, filenames_in_directory) + additional_files = get_additional_files(tmp_file, systemId, item.path, client, available_files=filenames_in_directory) FeaturesService.fromFileObj(session, projectId, tmp_file, {}, original_path=item_system_path, additional_files=additional_files) NotificationsService.create(session, user, "success", "Imported {f}".format(f=item_system_path)) diff --git a/geoapi/tests/api_tests/test_feature_service.py b/geoapi/tests/api_tests/test_feature_service.py index 340c0ffe..86651534 100644 --- a/geoapi/tests/api_tests/test_feature_service.py +++ b/geoapi/tests/api_tests/test_feature_service.py @@ -212,11 +212,29 @@ def test_create_tile_server_from_file(projects_fixture, tile_server_ini_file_fix assert tile_server.attribution == "OpenStreetMap contributorshttps://www.openstreetmap.org/copyright" -def test_create_questionnaire_feature(projects_fixture, questionnaire_file_fixture): - feature = FeaturesService.fromRAPP(db_session, projects_fixture.id, questionnaire_file_fixture, metadata={}) +def test_create_questionnaire_feature(projects_fixture, questionnaire_file_without_assets_fixture): + feature = FeaturesService.from_rapp_questionnaire(db_session, projects_fixture.id, + questionnaire_file_without_assets_fixture, + additional_files=None) assert feature.project_id == projects_fixture.id assert len(feature.assets) == 1 assert db_session.query(Feature).count() == 1 assert db_session.query(FeatureAsset).count() == 1 assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1 - assert os.path.isfile(os.path.join(get_project_asset_dir(projects_fixture.id), str(feature.assets[0].uuid) + "/questionnaire.rq")) + assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 1 + assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq")) + + +def test_create_questionnaire_feature_with_assets(projects_fixture, questionnaire_file_with_assets_fixture, image_file_fixture): + assets = [image_file_fixture] + feature = FeaturesService.from_rapp_questionnaire(db_session, projects_fixture.id, + questionnaire_file_with_assets_fixture, additional_files=assets) + assert feature.project_id == projects_fixture.id + assert len(feature.assets) == 1 + assert db_session.query(Feature).count() == 1 + assert db_session.query(FeatureAsset).count() == 1 + assert len(os.listdir(get_project_asset_dir(feature.project_id))) == 1 + assert len(os.listdir(get_asset_path(feature.assets[0].path))) == 3 + assert os.path.isfile(get_asset_path(feature.assets[0].path, "questionnaire.rq")) + assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.preview.jpg")) + assert os.path.isfile(get_asset_path(feature.assets[0].path, "image.jpg")) diff --git a/geoapi/tests/conftest.py b/geoapi/tests/conftest.py index 5acc390a..19c994ba 100644 --- a/geoapi/tests/conftest.py +++ b/geoapi/tests/conftest.py @@ -162,6 +162,7 @@ def gpx_file_fixture(): def image_file_fixture(): home = os.path.dirname(__file__) with open(os.path.join(home, 'fixtures/image.jpg'), 'rb') as f: + f.filename = 'image.jpg' yield f @@ -473,7 +474,18 @@ def tile_server_ini_file_fixture(): @pytest.fixture(scope="function") -def questionnaire_file_fixture(): +def questionnaire_file_without_assets_fixture(): home = os.path.dirname(__file__) - with open(os.path.join(home, 'fixtures/questionnaire.rq'), 'rb') as f: + filename = 'fixtures/questionnaire_without_assets.rq' + with open(os.path.join(home, filename), 'rb') as f: + f.filename = filename + yield f + + +@pytest.fixture(scope="function") +def questionnaire_file_with_assets_fixture(): + home = os.path.dirname(__file__) + filename = 'fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq' + with open(os.path.join(home, filename), 'rb') as f: + f.filename = filename yield f diff --git a/geoapi/tests/external_data_tests/test_external_data.py b/geoapi/tests/external_data_tests/test_external_data.py index dcd02e5b..015e6cc7 100644 --- a/geoapi/tests/external_data_tests/test_external_data.py +++ b/geoapi/tests/external_data_tests/test_external_data.py @@ -449,16 +449,16 @@ def test_is_member_of_rapp_project_folder(): assert not is_member_of_rapp_project_folder("/something/test.jpg") -def test_get_additional_files_none(agave_utils_with_geojson_file): - assert not get_additional_files("testSystem", "/testPath/file.jpg", agave_utils_with_geojson_file) +def test_get_additional_files_none(shapefile_fixture, agave_utils_with_geojson_file): + assert not get_additional_files(shapefile_fixture, "testSystem", "/testPath/file.jpg", agave_utils_with_geojson_file) -def test_get_additional_files(agave_utils_with_geojson_file): - files = get_additional_files("testSystem", "/testPath/file.shp", agave_utils_with_geojson_file) +def test_get_additional_files_shapefiles(shapefile_fixture, agave_utils_with_geojson_file): + files = get_additional_files(shapefile_fixture, "testSystem", "/testPath/file.shp", agave_utils_with_geojson_file) assert len(files) == 14 -def test_get_additional_files_with_available_files(agave_utils_with_geojson_file): +def test_get_additional_files_shapefiles_with_available_files(shapefile_fixture, agave_utils_with_geojson_file): available_files = ["/testPath/file.shx", "/testPath/file.dbf", "/testPath/file.sbn", @@ -473,7 +473,8 @@ def test_get_additional_files_with_available_files(agave_utils_with_geojson_file "/testPath/file.prj", "/testPath/file.xml", "/testPath/file.cpg"] - files = get_additional_files("testSystem", + files = get_additional_files(shapefile_fixture, + "testSystem", "/testPath/file.shp", agave_utils_with_geojson_file, available_files=available_files) @@ -482,17 +483,35 @@ def test_get_additional_files_with_available_files(agave_utils_with_geojson_file available_files = ["/testPath/file.shx", "/testPath/file.dbf", "/testPath/file.prj"] - files = get_additional_files("testSystem", + files = get_additional_files(shapefile_fixture, + "testSystem", "/testPath/file.shp", agave_utils_with_geojson_file, available_files=available_files) assert len(files) == 3 -def test_get_additional_files_but_missing_prj(agave_utils_with_geojson_file): +def test_get_additional_files_shapefiles_missing_prj(shapefile_fixture, agave_utils_with_geojson_file): available_files_missing_prj = ["/testPath/file.shx", "/testPath/file.dbf"] with pytest.raises(Exception): - get_additional_files("testSystem", + get_additional_files(shapefile_fixture, + "testSystem", "/testPath/file.shp", agave_utils_with_geojson_file, available_files=available_files_missing_prj) + + +def test_get_additional_files_rapid_questionnaire_with_assets(questionnaire_file_with_assets_fixture, agave_utils_with_geojson_file): + files = get_additional_files(questionnaire_file_with_assets_fixture, + "testSystem", + questionnaire_file_with_assets_fixture.filename, + agave_utils_with_geojson_file) + assert len(files) == 1 + + +def test_get_additional_files_rapid_questionnaire_no_assets(questionnaire_file_without_assets_fixture, agave_utils_with_geojson_file): + files = get_additional_files(questionnaire_file_without_assets_fixture, + "testSystem", + questionnaire_file_without_assets_fixture.filename, + agave_utils_with_geojson_file) + assert files == [] diff --git a/geoapi/tests/fixtures/questionnaire.rq b/geoapi/tests/fixtures/questionnaire.rq deleted file mode 100644 index 76137a0f..00000000 --- a/geoapi/tests/fixtures/questionnaire.rq +++ /dev/null @@ -1 +0,0 @@ -{"id":2171,"description":"Yellowstone Test","geolocation":[{"longitude":-122.30502990145563,"timestamp":1658877846.8723259,"latitude":47.652537730540047,"course":-1,"heading":24.99053955078125,"altitude":38.239620208740234}],"uuid":"8288DDDC-E50B-4A72-980E-84D7BD066BA9","version":"8288DDDC-E50B-4A72-980E-84D7BD066BA9","access":"private","owner":"elliot_n","end_uuid":"B20EBBAE-E3E2-42AB-88EA-7F9068BD4502","editable":false,"allow_back":true,"questionUuidsVisited":[],"sections":[{"id":"AA901679-2D1F-47BD-A67B-ABA5FDB0B23C","label":"Researcher Questions","questions":[{"id":"B5B0D1D8-7555-4414-A9FE-C3389AFD000F","heading":"Q1","label":"Are you wearing proper PPE","options":[{"go_to":null,"default":false,"label":"Yes","sub_question":null,"value":"yes"},{"go_to":null,"default":false,"label":"No","sub_question":null,"value":"no"}],"responseIndexes":[0],"decline":null,"type":"Yes \/ No","value":"question","mode":"list","assetUuids":[],"instructions":"Select one","required":true},{"id":"06A57174-5336-4BC4-BF87-BD5E9C6AF9B4","heading":"Is your subject wearing proper PPE?","label":"Enter your question text","options":[{"go_to":null,"default":false,"label":"Yes","sub_question":null,"value":"yes"},{"go_to":null,"default":false,"label":"No","sub_question":null,"value":"no"}],"responseIndexes":[0],"decline":null,"type":"Yes \/ No","value":"enter_your_question_text","mode":"list","assetUuids":[],"instructions":"Select one:","required":true},{"id":"2DAC188A-BBD0-42FF-B0B9-0C88498B2D36","heading":"Q3","label":"Please select one of the following:","options":[{"go_to":null,"default":false,"label":"Uninjured, not displaced","sub_question":null,"value":"uninjured_not_displaced"},{"go_to":null,"default":false,"label":"Minor Injuries, not displaced","sub_question":null,"value":"minor_injuries_not_displaced"},{"go_to":null,"default":false,"label":"Major injuries, not displaced","sub_question":null,"value":"major_injuries_not_displaced"},{"go_to":null,"default":false,"label":"Uninjured, displaced","sub_question":null,"value":"uninjured_displaced"},{"go_to":null,"default":false,"label":"Minor Injuries, displaced","sub_question":null,"value":"minor_injuries_displaced"},{"go_to":null,"default":false,"label":"Major injuries, displaced","sub_question":null,"value":"major_injuries_displaced"}],"responseIndexes":[5],"decline":null,"type":"Single Select","value":"please_select_one_of_the_follo","mode":"list","assetUuids":[],"instructions":"What is the status of your subject?","required":true}]},{"id":"EFFDA218-C78B-4EED-B107-7EB1A96F9C93","label":"Resident Questions","questions":[{"value":"primary_residence","responseIndexes":[0],"id":"2C63F564-BC9F-4BDA-B511-68470AF464B1","instructions":"Where were you during the flooding event?","decline":null,"assetUuids":[],"label":"Please select one or more of the following:","type":"Multi Select","required":true,"heading":"Q1","options":[{"go_to":null,"default":false,"label":"Primary Residence","sub_question":null,"value":"primary_residence"},{"go_to":null,"default":false,"label":"Secondary Residence","sub_question":null,"value":"secondary_residence"},{"go_to":null,"default":false,"label":"In the area, but not at primary\/secondary residence","sub_question":null,"value":"in_the_area_but_not_at_primary"},{"go_to":null,"default":false,"label":"Out of town","sub_question":null,"value":"out_of_town"},{"go_to":null,"default":false,"label":"Other","sub_question":null,"value":"other"}]}]}],"subversion":"1","is_invalid":false,"display_mode":"multi","end_text":"End","name":"EN_Test","errors":[]} diff --git a/geoapi/tests/fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq b/geoapi/tests/fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq new file mode 100644 index 00000000..85d16cac --- /dev/null +++ b/geoapi/tests/fixtures/questionnaire_with_assets.rqa/questionnaire_with_assets.rq @@ -0,0 +1,56 @@ +{ + "editable": false, + "uuid": "DE99A39A-B352-4FF6-BA6B-C325BB25CDFF", + "access": "private", + "subversion": "1", + "allow_back": true, + "errors": [], + "geolocation": [ + { + "altitude": 44.7580108642578, + "latitude": 12.3456789, + "course": -1, + "timestamp": 1680540248.33747, + "longitude": -123.456789 + } + ], + "version": "DE99A39A-B352-4FF6-BA6B-C325BB25CDFF", + "end_text": "End", + "description": "Questionnaire with only one question", + "sections": [ + { + "id": "D1893426-9206-4678-9589-B81B9E7042AB", + "label": "", + "questions": [ + { + "decline": null, + "id": "7389C442-05C0-46DA-AB1E-C9AF29A29A0C", + "required": true, + "responseStrings": [ + "5" + ], + "type": "Number", + "mode": "integer", + "heading": null, + "label": "Please enter an integer", + "assets": [ + { + "rappUuid": "9378FA7A-8BDD-4947-A3EC-2D66B938C59F", + "filename": "Q1-Photo-001.jpg" + } + ], + "instructions": null, + "value": "please_enter_an_integer" + } + ] + } + ], + "owner": "adioso", + "questionUuidsVisited": [], + "asset_embedding": true, + "is_invalid": false, + "id": 1918, + "display_mode": "single", + "end_uuid": "C551C60C-AA8D-4BBD-BCE4-87A1A11120E7", + "name": "Single Question" +} \ No newline at end of file diff --git a/geoapi/tests/fixtures/questionnaire_without_assets.rq b/geoapi/tests/fixtures/questionnaire_without_assets.rq new file mode 100644 index 00000000..48172d85 --- /dev/null +++ b/geoapi/tests/fixtures/questionnaire_without_assets.rq @@ -0,0 +1,214 @@ +{ + "id": 2171, + "description": "Yellowstone Test", + "geolocation": [ + { + "longitude": -122.30502990145563, + "timestamp": 1658877846.8723259, + "latitude": 47.652537730540047, + "course": -1, + "heading": 24.99053955078125, + "altitude": 38.239620208740234 + } + ], + "uuid": "8288DDDC-E50B-4A72-980E-84D7BD066BA9", + "version": "8288DDDC-E50B-4A72-980E-84D7BD066BA9", + "access": "private", + "owner": "elliot_n", + "end_uuid": "B20EBBAE-E3E2-42AB-88EA-7F9068BD4502", + "editable": false, + "allow_back": true, + "questionUuidsVisited": [], + "sections": [ + { + "id": "AA901679-2D1F-47BD-A67B-ABA5FDB0B23C", + "label": "Researcher Questions", + "questions": [ + { + "id": "B5B0D1D8-7555-4414-A9FE-C3389AFD000F", + "heading": "Q1", + "label": "Are you wearing proper PPE", + "options": [ + { + "go_to": null, + "default": false, + "label": "Yes", + "sub_question": null, + "value": "yes" + }, + { + "go_to": null, + "default": false, + "label": "No", + "sub_question": null, + "value": "no" + } + ], + "responseIndexes": [ + 0 + ], + "decline": null, + "type": "Yes \/ No", + "value": "question", + "mode": "list", + "assetUuids": [], + "instructions": "Select one", + "required": true + }, + { + "id": "06A57174-5336-4BC4-BF87-BD5E9C6AF9B4", + "heading": "Is your subject wearing proper PPE?", + "label": "Enter your question text", + "options": [ + { + "go_to": null, + "default": false, + "label": "Yes", + "sub_question": null, + "value": "yes" + }, + { + "go_to": null, + "default": false, + "label": "No", + "sub_question": null, + "value": "no" + } + ], + "responseIndexes": [ + 0 + ], + "decline": null, + "type": "Yes \/ No", + "value": "enter_your_question_text", + "mode": "list", + "assetUuids": [], + "instructions": "Select one:", + "required": true + }, + { + "id": "2DAC188A-BBD0-42FF-B0B9-0C88498B2D36", + "heading": "Q3", + "label": "Please select one of the following:", + "options": [ + { + "go_to": null, + "default": false, + "label": "Uninjured, not displaced", + "sub_question": null, + "value": "uninjured_not_displaced" + }, + { + "go_to": null, + "default": false, + "label": "Minor Injuries, not displaced", + "sub_question": null, + "value": "minor_injuries_not_displaced" + }, + { + "go_to": null, + "default": false, + "label": "Major injuries, not displaced", + "sub_question": null, + "value": "major_injuries_not_displaced" + }, + { + "go_to": null, + "default": false, + "label": "Uninjured, displaced", + "sub_question": null, + "value": "uninjured_displaced" + }, + { + "go_to": null, + "default": false, + "label": "Minor Injuries, displaced", + "sub_question": null, + "value": "minor_injuries_displaced" + }, + { + "go_to": null, + "default": false, + "label": "Major injuries, displaced", + "sub_question": null, + "value": "major_injuries_displaced" + } + ], + "responseIndexes": [ + 5 + ], + "decline": null, + "type": "Single Select", + "value": "please_select_one_of_the_follo", + "mode": "list", + "assetUuids": [], + "instructions": "What is the status of your subject?", + "required": true + } + ] + }, + { + "id": "EFFDA218-C78B-4EED-B107-7EB1A96F9C93", + "label": "Resident Questions", + "questions": [ + { + "value": "primary_residence", + "responseIndexes": [ + 0 + ], + "id": "2C63F564-BC9F-4BDA-B511-68470AF464B1", + "instructions": "Where were you during the flooding event?", + "decline": null, + "assetUuids": [], + "label": "Please select one or more of the following:", + "type": "Multi Select", + "required": true, + "heading": "Q1", + "options": [ + { + "go_to": null, + "default": false, + "label": "Primary Residence", + "sub_question": null, + "value": "primary_residence" + }, + { + "go_to": null, + "default": false, + "label": "Secondary Residence", + "sub_question": null, + "value": "secondary_residence" + }, + { + "go_to": null, + "default": false, + "label": "In the area, but not at primary\/secondary residence", + "sub_question": null, + "value": "in_the_area_but_not_at_primary" + }, + { + "go_to": null, + "default": false, + "label": "Out of town", + "sub_question": null, + "value": "out_of_town" + }, + { + "go_to": null, + "default": false, + "label": "Other", + "sub_question": null, + "value": "other" + } + ] + } + ] + } + ], + "subversion": "1", + "is_invalid": false, + "display_mode": "multi", + "end_text": "End", + "name": "EN_Test", + "errors": [] +} diff --git a/geoapi/utils/agave.py b/geoapi/utils/agave.py index 6ec28873..a7ba004e 100644 --- a/geoapi/utils/agave.py +++ b/geoapi/utils/agave.py @@ -214,6 +214,7 @@ def getFile(self, systemId: str, path: str) -> IO: allowed_attempts = 5 while allowed_attempts > 0: try: + logger.debug(f"Getting file {systemId}/{path}") return self._get_file(systemId, path) except RetryableTapisFileError: allowed_attempts = allowed_attempts - 1 diff --git a/kube/geoapi.yaml b/kube/geoapi.yaml index fd4725c5..4a802b35 100644 --- a/kube/geoapi.yaml +++ b/kube/geoapi.yaml @@ -48,6 +48,16 @@ data: max_ranges 0; expires 30d; add_header "Access-Control-Allow-Origin" *; + + # Preflighted requests + if ($request_method = OPTIONS ) { + add_header "Access-Control-Allow-Origin" *; + add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD, PUT, DELETE"; + add_header "Access-Control-Allow-Headers" "*"; + add_header 'Access-Control-Max-Age' 1728000; + add_header 'Content-Length' 0; + return 204; + } alias /assets/; } } From e4b2e6f7da642b2154e5b39fc3cbace0c3e88322 Mon Sep 17 00:00:00 2001 From: Nathan Franklin Date: Fri, 3 Nov 2023 16:02:51 -0500 Subject: [PATCH 2/3] task/WG-170: fix scraping of questionnaire (#152) * Improve method signature * Refactor how we scrape files Ensure that questionairre's asset images are scraped twice (i.e. the images in the rqa directory) * Add logging statement * Improve log statment * Fix pep8/flake errors * Remove single line * Remove added comma --- geoapi/services/features.py | 59 ++-------- geoapi/tasks/external_data.py | 40 +++---- geoapi/tasks/streetview.py | 4 +- .../external_data_tests/test_external_data.py | 4 +- geoapi/tests/utils_tests/test_features.py | 94 ++++++++++++++++ geoapi/utils/agave.py | 4 +- geoapi/utils/features.py | 101 ++++++++++++++++++ 7 files changed, 224 insertions(+), 82 deletions(-) create mode 100644 geoapi/tests/utils_tests/test_features.py create mode 100644 geoapi/utils/features.py diff --git a/geoapi/services/features.py b/geoapi/services/features.py index 3f95b531..24304b38 100644 --- a/geoapi/services/features.py +++ b/geoapi/services/features.py @@ -19,53 +19,14 @@ from geoapi.exceptions import InvalidGeoJSON, ApiException from geoapi.utils.assets import make_project_asset_dir, delete_assets, get_asset_relative_path from geoapi.log import logging -from geoapi.utils import geometries +from geoapi.utils import (geometries, + features as features_util) from geoapi.utils.agave import AgaveUtils logger = logging.getLogger(__name__) class FeaturesService: - GEOJSON_FILE_EXTENSIONS = ( - 'json', 'geojson' - ) - - IMAGE_FILE_EXTENSIONS = ( - 'jpeg', 'jpg', - ) - - VIDEO_FILE_EXTENSIONS = ( - 'mp4', 'mov', 'mpeg4', 'webm' - ) - - AUDIO_FILE_EXTENSIONS = ( - 'mp3', 'aac' - ) - - GPX_FILE_EXTENSIONS = ( - 'gpx', - ) - - SHAPEFILE_FILE_EXTENSIONS = ( - 'shp', - ) - - RAPP_FILE_EXTENSIONS = ( - 'rq', - ) - - ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS - - INI_FILE_EXTENSIONS = ( - 'ini', - ) - - ALLOWED_GEOSPATIAL_EXTENSIONS = IMAGE_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS + GEOJSON_FILE_EXTENSIONS\ - + SHAPEFILE_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS - - ALLOWED_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS + AUDIO_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS\ - + GEOJSON_FILE_EXTENSIONS + SHAPEFILE_FILE_EXTENSIONS + INI_FILE_EXTENSIONS + RAPP_FILE_EXTENSIONS - @staticmethod def get(database_session, featureId: int) -> Feature: """ @@ -371,17 +332,17 @@ def fromINI(database_session, projectId: int, fileObj: IO, metadata: Dict, origi def fromFileObj(database_session, projectId: int, fileObj: IO, metadata: Dict, original_path: str = None, additional_files=None) -> List[Feature]: ext = pathlib.Path(fileObj.filename).suffix.lstrip(".").lower() - if ext in FeaturesService.IMAGE_FILE_EXTENSIONS: + if ext in features_util.IMAGE_FILE_EXTENSIONS: return [FeaturesService.fromImage(database_session, projectId, fileObj, metadata, original_path)] - elif ext in FeaturesService.GPX_FILE_EXTENSIONS: + elif ext in features_util.GPX_FILE_EXTENSIONS: return [FeaturesService.fromGPX(database_session, projectId, fileObj, metadata, original_path)] - elif ext in FeaturesService.GEOJSON_FILE_EXTENSIONS: + elif ext in features_util.GEOJSON_FILE_EXTENSIONS: return FeaturesService.fromGeoJSON(database_session, projectId, fileObj, {}, original_path) - elif ext in FeaturesService.SHAPEFILE_FILE_EXTENSIONS: + elif ext in features_util.SHAPEFILE_FILE_EXTENSIONS: return FeaturesService.fromShapefile(database_session, projectId, fileObj, {}, additional_files, original_path) - elif ext in FeaturesService.INI_FILE_EXTENSIONS: + elif ext in features_util.INI_FILE_EXTENSIONS: return FeaturesService.fromINI(database_session, projectId, fileObj, {}, original_path) - elif ext in FeaturesService.RAPP_FILE_EXTENSIONS: + elif ext in features_util.RAPP_QUESTIONNAIRE_FILE_EXTENSIONS: return FeaturesService.from_rapp_questionnaire(database_session, projectId, fileObj, additional_files, original_path) else: raise ApiException("Filetype not supported for direct upload. Create a feature and attach as an asset?") @@ -443,9 +404,9 @@ def createFeatureAsset(database_session, projectId: int, featureId: int, fileObj """ fpath = pathlib.Path(fileObj.filename) ext = fpath.suffix.lstrip('.').lower() - if ext in FeaturesService.IMAGE_FILE_EXTENSIONS: + if ext in features_util.IMAGE_FILE_EXTENSIONS: fa = FeaturesService.createImageFeatureAsset(projectId, fileObj, original_path=original_path) - elif ext in FeaturesService.VIDEO_FILE_EXTENSIONS: + elif ext in features_util.VIDEO_FILE_EXTENSIONS: fa = FeaturesService.createVideoFeatureAsset(projectId, fileObj, original_path=original_path) else: raise ApiException("Invalid format for feature assets") diff --git a/geoapi/tasks/external_data.py b/geoapi/tasks/external_data.py index b7d602b0..7055d7c8 100644 --- a/geoapi/tasks/external_data.py +++ b/geoapi/tasks/external_data.py @@ -13,6 +13,7 @@ from geoapi.models import User, ProjectUser, ObservableDataProject, Task from geoapi.utils.agave import (AgaveUtils, SystemUser, get_system_users, get_metadata_using_service_account, AgaveFileGetError, AgaveListingError) +from geoapi.utils import features as features_util from geoapi.log import logger from geoapi.services.features import FeaturesService from geoapi.services.imports import ImportsService @@ -45,14 +46,6 @@ def _parse_rapid_geolocation(loc): return lat, lon -def is_member_of_rapp_project_folder(path): - """ - Check to see if path is contained within RApp project folder - :param path: str - """ - return "/RApp/" in path - - def get_file(client, system_id, path, required): """ Get file callable function to be used for asynchronous future task @@ -305,11 +298,8 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId: for item in files_in_directory: if item.type == "dir" and not str(item.path).endswith("/.Trash"): import_from_files_from_path(session, tenant_id, userId, systemId, item.path, projectId) - # skip any junk files that are not allowed - if item.path.suffix.lower().lstrip('.') not in FeaturesService.ALLOWED_EXTENSIONS: - continue - else: - item_system_path = os.path.join(item.system, str(item.path).lstrip("/")) + item_system_path = os.path.join(item.system, str(item.path).lstrip("/")) + if features_util.is_file_supported_for_automatic_scraping(item_system_path): try: # first check if there already is a file in the DB target_file = ImportsService.getImport(session, projectId, systemId, str(item.path)) @@ -319,16 +309,9 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId: f"successful_import={target_file.successful_import}") continue - # If it is a RApp project folder, grab the metadata from tapis meta service - if is_member_of_rapp_project_folder(item_system_path): - logger.info("RApp: importing:{} for user:{}".format(item_system_path, user.username)) - if item.path.suffix.lower().lstrip( - '.') not in FeaturesService.ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS: - logger.info("{path} is unsupported; skipping.".format(path=item_system_path)) - continue - - logger.info("{} {} {}".format(item_system_path, item.system, item.path)) - + # If it is a RApp project folder and not a questionnaire file, use the metadata from tapis meta service + if features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata(item_system_path): + logger.info(f"RApp: importing:{item_system_path} for user:{user.username}. Using metadata service for geolocation.") try: meta = get_metadata_using_service_account(tenant_id, item.system, item.path) except MissingServiceAccount: @@ -359,7 +342,7 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId: raise RuntimeError("Unable to create feature asset") NotificationsService.create(session, user, "success", "Imported {f}".format(f=item_system_path)) tmp_file.close() - elif item.path.suffix.lower().lstrip('.') in FeaturesService.ALLOWED_GEOSPATIAL_EXTENSIONS: + elif features_util.is_supported_for_automatic_scraping_without_metadata(item_system_path): logger.info("importing:{} for user:{}".format(item_system_path, user.username)) tmp_file = client.getFile(systemId, item.path) tmp_file.filename = Path(item.path).name @@ -369,14 +352,17 @@ def import_from_files_from_path(session, tenant_id: str, userId: int, systemId: NotificationsService.create(session, user, "success", "Imported {f}".format(f=item_system_path)) tmp_file.close() else: + # skipping as not supported + logger.debug("{path} is unsupported; skipping.".format(path=item_system_path)) continue import_state = ImportState.SUCCESS except Exception as e: - logger.error( - f"Could not import for user:{user.username} from agave:{systemId}/{item_system_path} " - f"(while recursively importing files from {systemId}/{path})") NotificationsService.create(session, user, "error", "Error importing {f}".format(f=item_system_path)) import_state = ImportState.FAILURE if e is not AgaveFileGetError else ImportState.RETRYABLE_FAILURE + logger.exception( + f"Could not import for user:{user.username} from agave:{systemId}/{item_system_path} " + f"(while recursively importing files from {systemId}/{path}). " + f"retryable={import_state == ImportState.RETRYABLE_FAILURE}") if import_state != ImportState.RETRYABLE_FAILURE: try: successful = True if import_state == ImportState.SUCCESS else False diff --git a/geoapi/tasks/streetview.py b/geoapi/tasks/streetview.py index 9d6b53e5..b70e2b47 100644 --- a/geoapi/tasks/streetview.py +++ b/geoapi/tasks/streetview.py @@ -24,7 +24,7 @@ remove_project_streetview_dir, MapillaryUtils) from geoapi.log import logging -import geoapi.services.features as features +from geoapi.utils import features as features_util from geoapi.services.streetview import StreetviewService from geoapi.services.notifications import NotificationsService from geoapi.db import create_task_session @@ -113,7 +113,7 @@ def _from_tapis(database_session, user: User, task_uuid: UUID, systemId: str, pa for item in files_in_directory: if item.type == "dir": continue - if item.path.suffix.lower().lstrip('.') not in features.FeaturesService.IMAGE_FILE_EXTENSIONS: + if item.path.suffix.lower().lstrip('.') not in features_util.IMAGE_FILE_EXTENSIONS: continue try: img_name = os.path.join(str(base_filepath), Path(item.path).name) diff --git a/geoapi/tests/external_data_tests/test_external_data.py b/geoapi/tests/external_data_tests/test_external_data.py index 015e6cc7..da5f2522 100644 --- a/geoapi/tests/external_data_tests/test_external_data.py +++ b/geoapi/tests/external_data_tests/test_external_data.py @@ -8,8 +8,8 @@ from geoapi.tasks.external_data import (import_from_agave, import_point_clouds_from_agave, refresh_observable_projects, - get_additional_files, - is_member_of_rapp_project_folder) + get_additional_files) +from geoapi.utils.features import is_member_of_rapp_project_folder from geoapi.utils.agave import AgaveFileListing, SystemUser from geoapi.utils.assets import get_project_asset_dir, get_asset_path from geoapi.exceptions import InvalidCoordinateReferenceSystem diff --git a/geoapi/tests/utils_tests/test_features.py b/geoapi/tests/utils_tests/test_features.py new file mode 100644 index 00000000..79797dfa --- /dev/null +++ b/geoapi/tests/utils_tests/test_features.py @@ -0,0 +1,94 @@ +import geoapi.utils.features as features_util + + +def test_is_member_of_rapp_project_folder(): + assert not features_util.is_member_of_rapp_project_folder("/") + assert not features_util.is_member_of_rapp_project_folder("/foo/") + + assert features_util.is_member_of_rapp_project_folder("/RApp/foo.txt") + assert features_util.is_member_of_rapp_project_folder("/RApp/bar/foo.txt") + + +def test_is_member_of_rqa_folder(): + assert not features_util.is_member_of_rqa_folder("/") + assert not features_util.is_member_of_rqa_folder("/foo/") + + assert features_util.is_member_of_rqa_folder("/RApp/foo.rqa/test.rq") + assert features_util.is_member_of_rqa_folder("/bar/foo.rqa/test.jpg") + + +def test_is_file_supported_for_automatic_scraping(): + assert not features_util.is_file_supported_for_automatic_scraping("foo") + assert not features_util.is_file_supported_for_automatic_scraping("foo.txt") + assert not features_util.is_file_supported_for_automatic_scraping("foo.gif") + assert not features_util.is_file_supported_for_automatic_scraping("foo.ini") + assert not features_util.is_file_supported_for_automatic_scraping("foo.las") + assert not features_util.is_file_supported_for_automatic_scraping("foo.laz") + + assert features_util.is_file_supported_for_automatic_scraping("foo.jpg") + assert features_util.is_file_supported_for_automatic_scraping("foo.JPG") + assert features_util.is_file_supported_for_automatic_scraping("foo.jpeg") + assert features_util.is_file_supported_for_automatic_scraping("foo.JPEG") + + assert features_util.is_file_supported_for_automatic_scraping("foo.geojson") + + assert features_util.is_file_supported_for_automatic_scraping("foo.mp4") + assert features_util.is_file_supported_for_automatic_scraping("foo.mov") + assert features_util.is_file_supported_for_automatic_scraping("foo.mpeg4") + assert features_util.is_file_supported_for_automatic_scraping("foo.webm") + + assert features_util.is_file_supported_for_automatic_scraping("foo.gpx") + + assert features_util.is_file_supported_for_automatic_scraping("foo.rq") + + assert features_util.is_file_supported_for_automatic_scraping("foo.shp") + + +def test_is_supported_for_automatic_scraping_without_metadata(): + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.txt") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.gif") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.ini") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.las") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.laz") + assert not features_util.is_supported_for_automatic_scraping_without_metadata("foo.mp4") + + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.jpg") + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.JPG") + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.jpeg") + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.JPEG") + + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.geojson") + + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.gpx") + + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.rq") + + assert features_util.is_supported_for_automatic_scraping_without_metadata("foo.shp") + + +def test_is_supported_file_type_in_rapp_folder_and_needs_metadata(): + # not supported type + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.txt") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.gif") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.ini") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.las") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.laz") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.geojson") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.rq") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.gpx") + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.shp") + + # not in Rapp folder + assert not features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/bar/foo.jpg") + + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.jpg") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.JPG") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.jpeg") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.JPEG") + + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.mp4") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.mov") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.mpeg4") + assert features_util.is_supported_file_type_in_rapp_folder_and_needs_metadata("/RApp/foo.webm") diff --git a/geoapi/utils/agave.py b/geoapi/utils/agave.py index a7ba004e..954461d7 100644 --- a/geoapi/utils/agave.py +++ b/geoapi/utils/agave.py @@ -142,7 +142,7 @@ def getMetaAssociated(self, uuid: str) -> Dict: out = {k: v for d in results for k, v in d.items()} return out - def _get_file(self, systemId: str, path: str, use_service_account: bool = False) -> IO: + def _get_file(self, systemId: str, path: str, use_service_account: bool = False) -> NamedTemporaryFile: """ Get file @@ -194,7 +194,7 @@ def _get_file(self, systemId: str, path: str, use_service_account: bool = False) raise RetryableTapisFileError return tmpFile - def getFile(self, systemId: str, path: str) -> IO: + def getFile(self, systemId: str, path: str) -> NamedTemporaryFile: """ Download a file from tapis diff --git a/geoapi/utils/features.py b/geoapi/utils/features.py new file mode 100644 index 00000000..99f5cdbc --- /dev/null +++ b/geoapi/utils/features.py @@ -0,0 +1,101 @@ +from pathlib import Path + +GEOJSON_FILE_EXTENSIONS = ( + 'json', 'geojson' +) + +IMAGE_FILE_EXTENSIONS = ( + 'jpeg', 'jpg', +) + +VIDEO_FILE_EXTENSIONS = ( + 'mp4', 'mov', 'mpeg4', 'webm' +) + +# TODO not used; remove from code base +AUDIO_FILE_EXTENSIONS = ( + 'mp3', 'aac' +) + +GPX_FILE_EXTENSIONS = ( + 'gpx', +) + +SHAPEFILE_FILE_EXTENSIONS = ( + 'shp', +) + +RAPP_QUESTIONNAIRE_FILE_EXTENSIONS = ( + 'rq', +) + +RAPP_QUESTIONNAIRE_ARCHIVE_EXTENSIONS = 'rqa' + +ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS = IMAGE_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS + +INI_FILE_EXTENSIONS = ( + 'ini', +) + +# Files who can be directly imported (with or without Tapis metadata) +ALLOWED_GEOSPATIAL_EXTENSIONS_FOR_SCRAPING = IMAGE_FILE_EXTENSIONS + GPX_FILE_EXTENSIONS + GEOJSON_FILE_EXTENSIONS +\ + SHAPEFILE_FILE_EXTENSIONS + RAPP_QUESTIONNAIRE_FILE_EXTENSIONS + + +def is_member_of_rapp_project_folder(path): + """ + Check to see if path is contained within RApp project folder + :param path: str + """ + return "/RApp/" in path + + +def is_member_of_rqa_folder(path): + """ + Check to see if path is contained within RApp project folder + :param path: str + """ + path_obj = Path(path) + return path_obj.parent and path_obj.parent.name.endswith('.' + RAPP_QUESTIONNAIRE_ARCHIVE_EXTENSIONS) + + +def is_file_supported_for_automatic_scraping(path): + """ + Check to see if file has a type supported for automatic importing + :param path: str + """ + path_obj = Path(path) + suffix = path_obj.suffix.lower().lstrip('.') + return (suffix in ALLOWED_GEOSPATIAL_EXTENSIONS_FOR_SCRAPING or # supported files (with or without Tapis metadata) + suffix in ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS) # with metadata (i.e. within /Rapp folder) + + +def is_supported_for_automatic_scraping_without_metadata(path): + """ + Check to see if file is supported for automatic importing (without metadata). + + Note: assets like images inside the questionnaire archive (i.e in .rqa) should be ignored. Only the + .rq file inside a .rqa file should be imported. + + :param path: str + """ + path_obj = Path(path) + file_suffix = path_obj.suffix.lower().lstrip('.') + return (file_suffix in ALLOWED_GEOSPATIAL_EXTENSIONS_FOR_SCRAPING and + (not is_member_of_rqa_folder(path) or file_suffix in RAPP_QUESTIONNAIRE_FILE_EXTENSIONS)) # if in .rqa, then only .rq file + + +def is_supported_file_type_in_rapp_folder_and_needs_metadata(path): + """ + Check if file is in /Rapp folder and is importable and if Tapis metadata service should be used to derive + the file's geolocation + + This applies to image and video files (i.e. ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS) in the RApp project folder + but the exception is the image and video files within the .rqa folder. + + :param path: str + """ + path_obj = Path(path) + return (is_member_of_rapp_project_folder(path) + and path_obj.suffix.lower().lstrip('.') in ALLOWED_GEOSPATIAL_FEATURE_ASSET_EXTENSIONS + and not is_member_of_rqa_folder(path)) From d20d43187fb10f6416b61378c5d65ce597d87de9 Mon Sep 17 00:00:00 2001 From: Nathan Franklin Date: Tue, 14 Nov 2023 14:23:16 -0600 Subject: [PATCH 3/3] Add handling of preflight requests in assets (#159) --- devops/geoapi-services/nginx.conf | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/devops/geoapi-services/nginx.conf b/devops/geoapi-services/nginx.conf index cb385510..b678cb9f 100644 --- a/devops/geoapi-services/nginx.conf +++ b/devops/geoapi-services/nginx.conf @@ -48,6 +48,17 @@ http { max_ranges 0; expires 30d; add_header "Access-Control-Allow-Origin" *; + + # Preflighted requests + if ($request_method = OPTIONS ) { + add_header "Access-Control-Allow-Origin" *; + add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD, PUT, DELETE"; + add_header "Access-Control-Allow-Headers" "*"; + add_header 'Access-Control-Max-Age' 1728000; + add_header 'Content-Length' 0; + return 204; + } + alias /assets/; } }