diff --git a/.gitignore b/.gitignore index 07780bc..c412cf0 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ scratch .favorites.json dist NOTES.md -data/ .env bak.env *.code-workspace diff --git a/README.md b/README.md index ed0c46c..52190a2 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,9 @@ Run the following command to fetch share data: make fetch_share_data +# SHARE Schema +A detailed SHARE description is contained in schema/share_schema.pdf. The schema folder also contains a script that was used to generate that PDF file from the original pages within HTML on the SHARE site. + # Loading and Using Existing SHARE Data from the API Included is a fairly simple script that demonstrates loading in JSON files that were retrieved by the fetch share data script or previously downloaded. It loads them from files within a given directory and converts them to JSON objects within code, and then outputing a sample record to the command line. When loading the JSON objects it also demonstrates to create simplified versions of the SHARE objects that only has relevant fields included. This is intended to be a starting point only for someone to add additional code to then do something with the JSON objects loaded (or to copy the approach in other languages such as python or ruby). diff --git a/schema/get_share_schema_html_to_pdf.sh b/schema/get_share_schema_html_to_pdf.sh new file mode 100644 index 0000000..ae1588f --- /dev/null +++ b/schema/get_share_schema_html_to_pdf.sh @@ -0,0 +1,18 @@ +#!/bin/bash +input="./share_schema_links.csv" +COUNTER=0 +PDFString="share_schema_home.pdf" +foldername="$(date +%Y%m%d%H%M%S)" +mkdir -p ./"$foldername" +while IFS= read -r line +do + PREV_COUNTER=$COUNTER + COUNTER=`expr $COUNTER + 1` + + wkhtmltopdf "$line" "./$foldername/$COUNTER.pdf" + PDFString+=" ./$foldername/${COUNTER}.pdf" + echo $PDFString +done < "$input" + +echo "Concatenating the PDF files..." +pdftk $PDFString cat output ./$foldername/share_schema.pdf diff --git a/schema/share_schema.pdf b/schema/share_schema.pdf new file mode 100644 index 0000000..7e17c45 Binary files /dev/null and b/schema/share_schema.pdf differ diff --git a/schema/share_schema_home.pdf b/schema/share_schema_home.pdf new file mode 100644 index 0000000..3ed033d Binary files /dev/null and b/schema/share_schema_home.pdf differ diff --git a/schema/share_schema_links.csv b/schema/share_schema_links.csv new file mode 100644 index 0000000..0ba475f --- /dev/null +++ b/schema/share_schema_links.csv @@ -0,0 +1,58 @@ +https://share.osf.io/api/schema/CreativeWork +https://share.osf.io/api/schema/DataSet +https://share.osf.io/api/schema/Patent +https://share.osf.io/api/schema/Poster +https://share.osf.io/api/schema/Presentation +https://share.osf.io/api/schema/Publication +https://share.osf.io/api/schema/Article +https://share.osf.io/api/schema/Book +https://share.osf.io/api/schema/ConferencePaper +https://share.osf.io/api/schema/Dissertation +https://share.osf.io/api/schema/Preprint +https://share.osf.io/api/schema/Project +https://share.osf.io/api/schema/Registration +https://share.osf.io/api/schema/Report +https://share.osf.io/api/schema/Thesis +https://share.osf.io/api/schema/WorkingPaper +https://share.osf.io/api/schema/Repository +https://share.osf.io/api/schema/Retraction +https://share.osf.io/api/schema/Software +https://share.osf.io/api/schema/Organization +https://share.osf.io/api/schema/Consortium +https://share.osf.io/api/schema/Department +https://share.osf.io/api/schema/Institution +https://share.osf.io/api/schema/Person +https://share.osf.io/api/schema/AgentIdentifier +https://share.osf.io/api/schema/WorkIdentifier +https://share.osf.io/api/schema/Award +https://share.osf.io/api/schema/Subject +https://share.osf.io/api/schema/Tag +https://share.osf.io/api/schema/IsAffiliatedWith +https://share.osf.io/api/schema/IsEmployedBy +https://share.osf.io/api/schema/IsMemberOf +https://share.osf.io/api/schema/Cites +https://share.osf.io/api/schema/Compiles +https://share.osf.io/api/schema/Corrects +https://share.osf.io/api/schema/Discusses +https://share.osf.io/api/schema/Disputes +https://share.osf.io/api/schema/Documents +https://share.osf.io/api/schema/Extends +https://share.osf.io/api/schema/IsDerivedFrom +https://share.osf.io/api/schema/IsPartOf +https://share.osf.io/api/schema/IsSupplementTo +https://share.osf.io/api/schema/References +https://share.osf.io/api/schema/RepliesTo +https://share.osf.io/api/schema/Retracts +https://share.osf.io/api/schema/Reviews +https://share.osf.io/api/schema/UsesDataFrom +https://share.osf.io/api/schema/Contributor +https://share.osf.io/api/schema/Creator +https://share.osf.io/api/schema/PrincipalInvestigator +https://share.osf.io/api/schema/PrincipalInvestigatorContact +https://share.osf.io/api/schema/Funder +https://share.osf.io/api/schema/Host +https://share.osf.io/api/schema/Publisher +https://share.osf.io/api/schema/ThroughAwards +https://share.osf.io/api/schema/ThroughContributor +https://share.osf.io/api/schema/ThroughSubjects +https://share.osf.io/api/schema/ThroughTags diff --git a/scripts/load_all_files.sh b/scripts/load_all_files.sh new file mode 100644 index 0000000..6c38483 --- /dev/null +++ b/scripts/load_all_files.sh @@ -0,0 +1,8 @@ +#!/bin/bash +input="./SHARE_2.csv" +COUNTER=0 +while IFS= read -r line +do + COUNTER=`expr $COUNTER + 1` + wkhtmltopdf "$line" $COUNTER.pdf +done < "$input" diff --git a/scripts/mv_share_files.sh b/scripts/mv_share_files.sh new file mode 100755 index 0000000..a47b52d --- /dev/null +++ b/scripts/mv_share_files.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +for file in *.gz; do +dir=$(echo ${file} | sed 's/\./_/g') +dir2=$(echo ${dir} | sed 's/_json-list_gz//g') +mkdir $dir2 +newfile=$(echo ${file} | sed 's/\.gz/_6\.gz/g') +mv $file $newfile +mv $newfile $dir2 +done