Skip to content

Commit

Permalink
updated script to support Tasmanian Insects data load
Browse files Browse the repository at this point in the history
  • Loading branch information
salomon-j committed Dec 23, 2024
1 parent f928332 commit 6b585f4
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 16 deletions.
6 changes: 3 additions & 3 deletions src/scripts/FOAImport.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class FOAImport {

List collectionImages = []

def csv = parseCsv(new File("${DATA_DIR}/foa_export_name.csv").newReader(FILE_ENCODING))
def csv = parseCsv(new File("${DATA_DIR}/taxon_TMAG_insects.csv").newReader(FILE_ENCODING))
csv.each { line ->
if (count++ % 50 == 0) println "Processing taxa line ${count}..."

Expand Down Expand Up @@ -246,7 +246,7 @@ class FOAImport {

static Map<String, String> loadAttributeTitles() {
Map<String, String> attributeTitles = [:]
def csv = parseCsv(new File("${DATA_DIR}/foa_export_attr.csv").newReader(FILE_ENCODING))
def csv = parseCsv(new File("${DATA_DIR}/attributes_TMAG_Insects.csv").newReader(FILE_ENCODING))
csv.each { line ->
try {
String propertyName = line.PROPERTY_NAME?.replaceAll("_", " ")?.trim()
Expand All @@ -269,7 +269,7 @@ class FOAImport {
static Map<String, Map<String, List<String>>> loadAttributes(Map<String, String> attributeTitles) {
Map<String, Map<String, List<String>>> attributes = [:]
int count = 0
def csv = parseCsv(new File("${DATA_DIR}/foa_export_attr.csv").newReader(FILE_ENCODING))
def csv = parseCsv(new File("${DATA_DIR}/attributes_TMAG_Insects.csv").newReader(FILE_ENCODING))
csv.each { line ->
if (count++ % 50 == 0) println "Processing attribute line ${count}..."
try {
Expand Down
42 changes: 29 additions & 13 deletions src/scripts/UpdateProfileAttribute.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,22 @@ class UpdateProfileAttribute {
static String USER_DISPLAY_NAME
static String OUTPUT_FILE
static String IMPORT_OUTPUT_FILE
static String ACCESS_TOKEN

static void main(args) {
def cli = new CliBuilder(usage: "groovy UpdateProfileAttribute -f <datadir> -o opusId -p <profileServiceBaseUrl> -u <emailAddress> -r <reportfile>")
def cli = new CliBuilder(usage: "groovy UpdateProfileAttribute -f <datadir> -o opusId -p <profileServiceBaseUrl> -u <emailAddress> -r <reportfile> -a <accessToken>")
cli.f(longOpt: "dir", "source data directory", required: true, args: 1)
cli.o(longOpt: "opusId", "UUID of the FOA Opus", required: true, args: 1)
cli.p(longOpt: "profileServiceBaseUrl", "Base URL of the profile service", required: true, args: 1)
cli.u(longOpt: "userId", "User id of a ALA user importing script", required: true, args: 1)
cli.d(longOpt: "displayName", "Display name of a ALA user importing script", required: true, args: 1)
cli.i(longOpt: "importFile", "Email address of the ALA user importing script", required: true, args: 1)
cli.r(longOpt: "reportFile", "File to write the results of the import to", required: false, args: 1)
cli.a(longOpt: "accessToken", "Bearer token to access profiles service", required: true, args: 1)


OptionAccessor opt = cli.parse(args)
println opt: opt
if(!opt) {
cli.usage()
return
Expand All @@ -72,6 +76,7 @@ class UpdateProfileAttribute {
IMPORT_OUTPUT_FILE = opt.i
OUTPUT_FILE = opt.r ?: "updatedAttributes.json"
ATTRIBUTE_OPTION = OVERWRITE
ACCESS_TOKEN = opt.a ?: ""

Map<Integer, String> attributeTitles = loadAttributeTitles()
Map<Integer, Map<String, List<String>>> taxaAttributes = loadAttributes(attributeTitles)
Expand All @@ -84,21 +89,29 @@ class UpdateProfileAttribute {
output.createNewFile()
}


def csv = parseCsv(new File("${DATA_DIR}/foa_export_name.csv").newReader(FILE_ENCODING))
def csv = parseCsv(new File("${DATA_DIR}/taxon_TMAG_insects.csv").newReader(FILE_ENCODING))
csv.each { taxon ->
if (existingProfile[taxon.NAME.toLowerCase()]) {
PROFILE_ID = URLEncoder.encode(taxon.NAME, FILE_ENCODING)
PROFILE_ID = PROFILE_ID.replace('+', '%20')
PROFILE_SERVICE_PROFILE_URL = "$PROFILE_URL/opus/$OPUS_ID/profile/$PROFILE_ID?latest=true"
println PROFILE_SERVICE_PROFILE_URL

RESTClient client = new RESTClient(PROFILE_SERVICE_PROFILE_URL)
def resp = client.get([:])
def profile = resp.getData()
println new JsonBuilder(profile).toPrettyString()
def client = new RESTClient(PROFILE_SERVICE_PROFILE_URL)
client.setHeaders(["Authorization": "Bearer ${ACCESS_TOKEN}"])
def profile
try {
def resp = client.get([:])
profile = resp.getData()
println new JsonBuilder(profile).toPrettyString()
} catch (groovyx.net.http.HttpResponseException e) {
println "statusCode: " + e.statusCode
println "response data: " + e.response.data
return

}

Integer taxonId = taxon.TAXA_ID as int
String taxonId = taxon.TAXA_ID
Map<String, List<String>> attrs = taxaAttributes.get(taxonId)
attrs.each { k, v ->
def ATTRIBUTE_URL
Expand Down Expand Up @@ -192,12 +205,12 @@ class UpdateProfileAttribute {

static Map<Integer, String> loadAttributeTitles() {
Map<Integer, String> attributeTitles = [:]
def csv = parseCsv(new File("${DATA_DIR}/foa_export_attr.csv").newReader(FILE_ENCODING))
def csv = parseCsv(new File("${DATA_DIR}/attributes_TMAG_Insects.csv").newReader(FILE_ENCODING))
csv.each { line ->
try {
String propertyName = line.PROPERTY_NAME?.replaceAll("_", " ")?.trim()
propertyName = StringUtils.capitalize(propertyName)
attributeTitles << [(line.PROPERTY_ID as Integer): propertyName]
attributeTitles << [(line.PROPERTY_ID): propertyName]
} catch (e) {
println "Failed to extract attribute titles from line [${line}]"
e.printStackTrace()
Expand All @@ -216,13 +229,16 @@ class UpdateProfileAttribute {
static Map<Integer, Map<String, List<String>>> loadAttributes(Map<Integer, String> attributeTitles) {
Map<Integer, Map<String, List<String>>> attributes = [:]
int count = 0
def csv = parseCsv(new File("${DATA_DIR}/foa_export_attr.csv").newReader(FILE_ENCODING))

def csv = parseCsv(new File("${DATA_DIR}/attributes_TMAG_Insects.csv").newReader(FILE_ENCODING))
csv.each { line ->
if (count++ % 50 == 0) println "Processing attribute line ${count}..."
try {
String title = attributeTitles[line.PROPERTY_ID as Integer]

attributes.get(line.TAXA_ID as Integer, [:]).get(title, []) << cleanupText(line.VAL)
String title = attributeTitles[line.PROPERTY_ID]

attributes.get(line.TAXA_ID, [:]).get(title, []) << cleanupText(line.VAL)

} catch (e) {
println "${e.message} - ${line}"
}
Expand Down

2 comments on commit 6b585f4

@salomon-j
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@temi fyi script update

@temi
Copy link
Contributor

@temi temi commented on 6b585f4 Dec 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@salomon-j LGTM

Please sign in to comment.