From fc13c1b476745e14b142d7b88d155596abf8e74e Mon Sep 17 00:00:00 2001 From: jambun Date: Thu, 2 Feb 2017 16:05:49 +1100 Subject: [PATCH] first pass basic resource csv importer --- README.md | 12 + .../converters/basic_resource_converter.rb | 210 ++++++++++++++++++ frontend/locales/en.yml | 1 + samples/basic_resource.csv | 1 + 4 files changed, 224 insertions(+) create mode 100644 backend/converters/basic_resource_converter.rb create mode 100644 samples/basic_resource.csv diff --git a/README.md b/README.md index 0d2856f..d451150 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ It adds three new Import Types to the Import Data Job Type. * Arrearage spreadsheet * Donor Box List spreadsheet * Digital Library Collections CSV + * Basic Resource CSV Arrearage spreadsheet @@ -73,6 +74,17 @@ A sample CSV file is included here: https://github.com/hudmol/nla_staff_spreadsheet_importer/blob/master/samples/dlc.csv +Basic Resource CSV +------------------ + +Takes a CSV file and creates a resource record with no components for each row. The resource record +includes rights statement, extent, date, and 'scope and contents' note sub-records. + +A sample CSV file is included here: + + https://github.com/hudmol/nla_staff_spreadsheet_importer/blob/master/samples/basic_resource.csv + + ## Installation ### From a released version diff --git a/backend/converters/basic_resource_converter.rb b/backend/converters/basic_resource_converter.rb new file mode 100644 index 0000000..738179f --- /dev/null +++ b/backend/converters/basic_resource_converter.rb @@ -0,0 +1,210 @@ +class BasicResourceConverter < Converter + + def self.instance_for(type, input_file) + if type == "basic_resource" + self.new(input_file) + else + nil + end + end + + + def self.import_types(show_hidden = false) + [ + { + :name => "basic_resource", + :description => "Paper Collection Sheets CSV" + } + ] + end + + + def self.profile + "Convert a Paper Collection Sheets CSV to ArchivesSpace Resource records" + end + + + def initialize(input_file) + super + @batch = ASpaceImport::RecordBatch.new + @input_file = input_file + @records = [] + + @columns = %w( + title + resource_id + access_conditions + use_conditions + granted_note + scopecontent_note + processing_note_1 + processing_note_2 + date_expression + extent_container_summary + extent_number + extent_type + ) + + end + + + def run + rows = CSV.read(@input_file) + + begin + while(row = rows.shift) + values = row_values(row) + + next if values.compact.empty? + + values_map = Hash[@columns.zip(values)] + + # skip header rows + next if values_map['title'] == 'resources_basicinformation_title' || + values_map['title'].nil? || + values_map['title'] == 'Title' + + create_resource(values_map) + end + rescue StopIteration + end + + # assign all records to the batch importer in reverse + # order to retain position from spreadsheet + @records.reverse.each{|record| @batch << record} + end + + + def get_output_path + output_path = @batch.get_output_path + + p "==================" + p output_path + p File.read(output_path) + p "==================" + + output_path + end + + + private + + def create_resource(row) + # turns out Emma wants the whole id in id_0 + # leaving this stuff here because, when, you know ... + # id_a = row['resource_id'].split(/\s+/) + id_a = [row['resource_id']] + id_a = id_a + Array.new(4 - id_a.length) + identifier_json = JSON(id_a) + + uri = "/repositories/12345/resources/import_#{SecureRandom.hex}" + + @records << JSONModel::JSONModel(:resource). + from_hash({ + :uri => uri, + :id_0 => id_a[0], + :id_1 => id_a[1], + :id_2 => id_a[2], + :id_3 => id_a[3], + :title => row['title'], + :level => 'collection', + :repository_processing_note => format_processing_note(row), + :extents => [format_extent(row, :portion => 'whole')].compact, + :dates => [format_date(row['date_expression'])].compact, + :rights_statements => [format_rights_statement(row)].compact, + :notes => [format_scopecontent_note(row)].compact, + :language => 'eng', + }) + + end + + + def format_rights_statement(row) + { + :rights_type => 'institutional_policy', + :permissions => row['access_conditions'], + :restrictions => row['use_conditions'], + :granted_note => row['granted_note'] + } + end + + + def format_processing_note(row) + [row['processing_note_1'], row['processing_note_2']].compact.join(' ') + end + + + def format_date(date_string) + return if date_string.nil? + + { + :date_type => date_string =~ /-/ ? 'inclusive' : 'single', + :label => 'creation', + :expression => date_string || "No date provided" + } + end + + + def format_extent(row, opts = {}) + return unless row['extent_number'] && row['extent_type'] + + { + :portion => opts.fetch(:portion) { 'part' }, + :extent_type => row['extent_type'], + :container_summary => row['extent_container_summary'], + :number => row['extent_number'], + } + end + + + def format_scopecontent_note(row) + return unless row['scopecontent_note'] + { + :jsonmodel_type => 'note_multipart', + :type => 'scopecontent', + :subnotes =>[{ + :jsonmodel_type => 'note_text', + :content => row['scopecontent_note'] + }] + } + end + + + def row_values(row) + (0...row.size).map {|i| row[i] ? row[i].to_s.strip : nil} + end + + + def format_record(row) + + record_hash = { + :uri => "/repositories/12345/archival_objects/import_#{SecureRandom.hex}", + :title => row['title'], + :component_id => row['component_id'], + :level => format_level(row['level']), + :dates => [format_date(row['date'])].compact, + :extents => [format_extent(row)].compact, + :instances => [format_instance(row)].compact, + :notes => [], + :linked_agents => [format_agent(row)].compact, + :resource => { + :ref => @resource_uri + }, + } + + if row['processinfo_note'] + record_hash[:notes] << { + :jsonmodel_type => 'note_multipart', + :type => 'processinfo', + :subnotes =>[{ + :jsonmodel_type => 'note_text', + :content => row['processinfo_note'] + }] + } + end + + + record_hash + end + +end diff --git a/frontend/locales/en.yml b/frontend/locales/en.yml index 5ebcf36..816c38c 100644 --- a/frontend/locales/en.yml +++ b/frontend/locales/en.yml @@ -3,3 +3,4 @@ en: import_type_arrearage: Arrearage spreadsheet import_type_donation: Donor Box List spreadsheet import_type_dlc: Digital Library Collections CSV + import_type_basic_resource: Basic Resource CSV diff --git a/samples/basic_resource.csv b/samples/basic_resource.csv new file mode 100644 index 0000000..ddb17d2 --- /dev/null +++ b/samples/basic_resource.csv @@ -0,0 +1 @@ +resources_basicinformation_title,resources_basicinformation_identifier,resources_rights_statements = institutional policy; access conditions,resources_rights_statement = institutional policy: use conditions,resources_rights_statement = institutional policy: granted note,resources_notes_scopeandcontentnote,resource_basic_information_resopsitory_processing_note,resource_basic_information_resopsitory_processing_note,resources_dates = Creation; Expression,resources_container_summary,resources_extent_number,resources_extent_type ,,,,,,,project statement (WORDING TO BE ADVISED),,CONTENT TBA,CONTENT TBA,CONTENT TBA Title,Orig Call Number,Access Conditions,Terms of use,Granted Note,Summary,Related Holdings,,Date range,Extent,Metres, "Journal of H.M.S. Endeavour, 1768-1771",MS 1,Available for reference. Not for loan.,,Something,"Holograph journal, written entirely in James Cook's hand, of the voyage of H.M.S. Endeavour during which Cook discovered eastern Australia and circumnavigated New Zealand. Bound with the journal is a copy of a report from John Hutchinson, surgeon of the Dolphin to Capt. Samuel Wallis, 16th May 1768, of observations on the effects of saloop, portable soup, mustard and vinegar, distilled water and beef fat on scurvy. For further details and publications of the journal see J.C. Beaglehole (ed.) 'The journals of Captain Cook' I cxciv-cciv.",,,[1768-1771],753 p. fol.,1,metres Journal extracts 1839-1840,MS 10,Available for reference. Not for loan.,,Not something,"Elliott described cultural, religious and social conditions and the institutions and people connected with them in graphic, original detail. He made a particular study of educational and religious work. His travels in Australia extended to Paramatta, Pitt Town, Windsor and to Richmond, in the Hawkesbury District. In New Zealand, where his time was short, he only travelled to Pachia and to other points on the shore of the Bay of Islands.",,,1839-1840,1 v.,2,metres Papers regarding the biography of Tom Roberts 1933-1945,MS 100,Available for reference. Not for loan.,,,"Galleys of book 'Tom Roberts: father of Australian landscape painting' (1935). Letters to R. H. Croll concerning the biography, 1932-1935, including letters from Jean Roberts, Caleb Roberts (son), M. J. MacNally, William Moore, Elizabeth A. Fraser, G. V. F. Mann, Kenneth Binns and Dr. William Maloney. Letters, 1935-1936, acknowledging receipt of copies of the biography. 'Before I forget: more recollections of R. H. Croll' is also included.",,,1933-1945,4 cm. 4 folders.,3,metres "Papers relating to the Church of Scotland and Scottish settlers in East Maitland, N.S.W. 1839-1865",MS 1000,Available for reference. Not for loan.,,"A note, granted","Drafts of letters, notes, accounts, food lists, etc. to do with the Church of Scotland and Highlanders settled in East Maitland, N.S.W. An important concern is these people's need of a Highland pastor.",,,1839-1865,43 p.,4,metres "Papers of A.J. Metcalfe, 1961-1971",MS 10001,Available for research. Not for loan.,,dfs s sdf,"Typescript autobiography (2 volumes) recounting Dr Metcalfe's career in health, including observations on health administration, Aboriginal health and communicable diseases. Also contains observations on political figures, including Earle Page and 'Doc' Evatt.",,,[1961-1971],9 cm. (3 folders),5,metres "Papers of Rupert Gerritsen, 2007",MS 10002,Available for research. Not for loan.,,,Research paper (25 p.) entitled 'Ethnohistory and the Antipodes'.,,,2007,1 cm. (1 folder),6,metres "Papers of Pru Gordon, 1975-1992",MS 10003,Available for reference. Not for loan.,,,"Copies of newspaper cuttings relating to Rainbow Warrior, Greenpeace and the French Pacific.",MS Acc GB96/2103,,1975-1992,,7,metres "Letters of Patrick White to James Waites, 1978-1981",MS 10004,Available for reference. Not for loan.,,,"Correspondence from Patrick White to James Waites. Comprises: one signed postcard ca. 1978; one short autograph letter dated 12th August 1979, concerning the death of James Waites sister (aged 23) and offering company and a meal; three page autograph letter, dated 2nd June 1981. Letter includes reference to Flaws in the glass and offers advice on Waites further education.",MS Acc07/86,,1978-1981,,8,metres "Manuscript translations of German sheep breeding texts, [ca. 1880-1900]",MS 10005,Available for reference. Not for loan.,,,"Handwritten manuscript translations of German publications: 1. Die verschiedenen Phasen der deutschen Merinozucht (Berlin : Bosselmann, 1857) : The different phases of German Merino breeding (378 leaves); 2. Die Schafzucht und Wollkunde [fuer Schafzuechter und Landwirthe] (Stuttgart : Ebner & Seubert, 1860) : Sheep rearing and wool studies (199 leaves) plus 45 original photographs. Each manuscript is written in ink in the same hand.",MS Acc07/85,,[ca. 1880-1900],,9,metres "Papers of the Carey family, 1891-1988",MS 10006,Available for research. Not for loan.,,,"The papers include press cuttings, music programs, publications, letters, school documents and photographs of members of the Carey family, and published and unpublished music manuscripts of Dunbar Carey. The collection also includes a number of diaries: Alice Waterworth (Carey) typescript copy of a diary describing her voyage to Australia 1886; Diary of Private E.R.D. Carey compiled by John L. Carey; The musical life of Mr Dunbar Carey compiled by John L. Carey.",MS Acc07/62,,1891-1988,,10,metres "Journeyman at Cambridge, [ca. 1961]",MS 10007,Available for reference. Not for loan.,"Requests for copying should be directed to Thomas H. Manning Polar Archives, Scott Polar Research Institute, University of Cambridge.",,"Copy of an unpublished autobiography by Thomas Griffith Taylor covering his life in Cambridge to 1939, with accounts of journeys elsewhere including the Antarctic during the British National Antarctic Expedition, 1910-13.",MS Acc07/70,,[ca. 1961],,11,metres "Theatrical scripts based on Australian themes or written for Australian actors, 1860-1869",MS 10008,Available for reference. Not for loan.,,,"Collection of handwritten theatrical scripts, the majority belonging to F.B. Egan of the Queen's Theatre, Manchester, comprising some thirty stitched parts (each covering either an individual act or character part) for plays performed in the mid nineteenth century British provincial theatres. Plays include Gold, in three acts and Never too late to mend, in four acts as well as other plays and parts. Some of the scripts have been extensively marked-up with stage-directions, cuts and additions for performance.",MS Acc07/49,,1860-1869,,12,metres "Diary of Alberto Dias Soares, 1855-1856",MS 10009,Available for reference. Not for loan.,,,"One volume is an original diary written by Soares documenting his life in Australia. He writes about his work as a lay assistant to Reverend Robert Cartwright, his daily duties and the people around him.The second volume is a full contemporary transcription of the diary, of which provenance is unknown.",MS Acc07/36,,1855-1856,,13,metres "Papers of William Hughes, 1940 May-1952",MS 1001,Available for research. Not for loan.,,,"Items include: I. Copy of letter from Ezra Norton, Managing Director of 'Truth' and 'Sportsman' Ltd., to R.G. Menzies (3rd June 1940). II. 'Budget 1951-1952'. Four typescript pages, unsigned. Possibly a budget speech presented to the Australian Parliament. III. Two page proposal from H.S. Foll (Minister for the Interior) entitled 'Proposal for the transfer of young children from the United Kingdom to Australia during the war'. Dated 10th June 1940. IV. One page copy of a cablegram to the High Commissioner (29th May 1940) seeking to ascertain Britain's response to an Australian request for looking after 5,000 British children for the duration of the war. V. Four secret reports to the War Cabinet for its consideration (12th to 15th June 1940). Includes reports by the Chiefs of Staff; summary of decisions of the War Cabinet for the information of the Full Cabinet. VI. Folio items include two hand coloured etchings (one by De Becourt) and 'The four freedoms' ([11 p.]), originally published by 'The Saturday Evening Post' during February and March of 1943 and presented to Billy Hughes. The art work is by Norman Rockwell.",,,[1940-1952],1 cm. (1 folder) + folio items.,14,metres "Letter from Frank Hurley, [19--]",MS 10010,Available for reference. Not for loan.,,,Letter of reply from Frank Hurley to Miss Rona Hill. Handwritten on the letterhead Frank Hurley Master Art Production.,MS Acc07/88,,[19--],,15,metres "Papers of Beth Dean, 1950-2003",MS 10011,Available for reference. Not for loan.,,,"Collection contains photographs, postcards, correspondence, newspaper clippings, pamphlets, press releases, programs, literary manuscripts and books.Photographs in the collection are of dance companies, dance events and portraits of well known dancers. Some of the individuals covered in the photographs include Martha Graham, Anna Pavlova, Margot Fonteyn and Rudolph Nureyev. The collection also contains two literary manuscripts, a typescript essay Valley of the Kangaroos by Victor Carell and handwritten notes and a typesript draft by Dean for a book titled Laura's Story (biography of Dean's grandmother).",MS Acc04/180,,1950-2003,,16,metres "Papers of Sydney Powell, [circa 1920-1940]",MS 10012,Available for research. Not for loan.,,,Unpublished annotated typescripts by Sydney Powell. Titles include: Each to his taste; Observations; Brought forward; Off the bus route; Poems and verse. Also included in the collection is a photograph of Sydney Powell and some correspondence.,MS Acc03/124,,1999,,17,metres "Records of the Ninety-Nines, International Organisation of Women Pilots, Australian section, 1959-1994",MS 10013,Available for reference. Not for loan.,,,"Collection contains minutes, agenda papers, newsletters, nominations for offices, Governor's reports, general correspondence and financial records.",MS Acc03/2,,1066-2001,,18,metres "Papers of Ray Mathew, 1955-2004",MS 10014,Available for reference. Not for loan.,,,"Press reviews, Ray Mathew's 'Notes for producers and others' and a program for a production of Mathew's play 'We find the bunyip' in c. 1955.Included is a letter from Bill Callander recounting his contact with Mathew in the 1950s.",MS Acc04/109,,1955-2004,,19,metres "Papers of Russell Braddon, 1943-1995 (bulk 1943-1973)",MS 10015,Available for research. Not for loan.,Copying of Russell Braddon's copyright material permitted for research purposes.,,Letters (1943-1953) including those written by Braddon to his mother during imprisonment in Changi; newspaper clippings from 1949 to 1995 relating to Braddon's career as an author; newspaper clippings of reviews for Braddon's You'll never get off this island (1960); typescript of Braddon's The taming of the shrew (1966); bound copy for his mother of a typescript draft of Braddon's Out of the storm.,,,[1943-1995],20 cm. (1 box),20,metres \ No newline at end of file