diff --git a/.sass-cache/023a3dba783b76afb85cda5e760bcaa735640621/style.sassc b/.sass-cache/023a3dba783b76afb85cda5e760bcaa735640621/style.sassc deleted file mode 100644 index 895c32e..0000000 Binary files a/.sass-cache/023a3dba783b76afb85cda5e760bcaa735640621/style.sassc and /dev/null differ diff --git a/gemfile b/Gemfile similarity index 89% rename from gemfile rename to Gemfile index c708651..80dec74 100644 --- a/gemfile +++ b/Gemfile @@ -1,5 +1,6 @@ source 'https://rubygems.org' gem 'sinatra' +ruby "2.4.1" # other dependencies gem 'haml' @@ -11,3 +12,4 @@ gem 'sass' gem 'haml-contrib' gem 'tilt', '~> 1.4.1' gem 'RedCloth' +gem 'yard' diff --git a/Gemfile.lock b/Gemfile.lock index b3d50e6..45c5835 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -18,6 +18,7 @@ GEM rack-protection (~> 1.4) tilt (~> 1.3, >= 1.3.4) tilt (1.4.1) + yard (0.9.12) PLATFORMS ruby @@ -33,3 +34,10 @@ DEPENDENCIES shotgun sinatra tilt (~> 1.4.1) + yard + +RUBY VERSION + ruby 2.4.1p111 + +BUNDLED WITH + 1.16.1 diff --git a/Rakefile b/Rakefile deleted file mode 100644 index 9b4b8af..0000000 --- a/Rakefile +++ /dev/null @@ -1,199 +0,0 @@ -require 'rake/clean' -require 'rake/testtask' -require 'fileutils' -require 'date' - -# CI Reporter is only needed for the CI -begin - require 'ci/reporter/rake/test_unit' -rescue LoadError -end - -task :default => :test -task :spec => :test - -CLEAN.include "**/*.rbc" - -def source_version - @source_version ||= begin - load './lib/sinatra/version.rb' - Sinatra::VERSION - end -end - -def prev_feature - source_version.gsub(/^(\d\.)(\d+)\..*$/) { $1 + ($2.to_i - 1).to_s } -end - -def prev_version - return prev_feature + '.0' if source_version.end_with? '.0' - source_version.gsub(/\d+$/) { |s| s.to_i - 1 } -end - -# SPECS =============================================================== - -task :test do - ENV['LANG'] = 'C' - ENV.delete 'LC_CTYPE' -end - -Rake::TestTask.new(:test) do |t| - t.test_files = FileList['test/*_test.rb'] - t.ruby_opts = ['-rubygems'] if defined? Gem - t.ruby_opts << '-I.' - t.warning = true -end - -Rake::TestTask.new(:"test:core") do |t| - core_tests = %w[base delegator encoding extensions filter - helpers mapped_error middleware radius rdoc - readme request response result route_added_hook - routing server settings sinatra static templates] - t.test_files = core_tests.map {|n| "test/#{n}_test.rb"} - t.ruby_opts = ["-rubygems"] if defined? Gem - t.ruby_opts << "-I." - t.warning = true -end - -# Rcov ================================================================ - -namespace :test do - desc 'Measures test coverage' - task :coverage do - rm_f "coverage" - sh "rcov -Ilib test/*_test.rb" - end -end - -# Website ============================================================= - -desc 'Generate RDoc under doc/api' -task 'doc' => ['doc:api'] -task('doc:api') { sh "yardoc -o doc/api" } -CLEAN.include 'doc/api' - -# README =============================================================== - -task :add_template, [:name] do |t, args| - Dir.glob('README.*') do |file| - code = File.read(file) - if code =~ /^===.*#{args.name.capitalize}/ - puts "Already covered in #{file}" - else - template = code[/===[^\n]*Liquid.*index\.liquid<\/tt>[^\n]*/m] - if !template - puts "Liquid not found in #{file}" - else - puts "Adding section to #{file}" - template = template.gsub(/Liquid/, args.name.capitalize).gsub(/liquid/, args.name.downcase) - code.gsub! /^(\s*===.*CoffeeScript)/, "\n" << template << "\n\\1" - File.open(file, "w") { |f| f << code } - end - end - end -end - -# Thanks in announcement =============================================== - -team = ["Ryan Tomayko", "Blake Mizerany", "Simon Rozet", "Konstantin Haase"] -desc "list of contributors" -task :thanks, [:release,:backports] do |t, a| - a.with_defaults :release => "#{prev_version}..HEAD", - :backports => "#{prev_feature}.0..#{prev_feature}.x" - included = `git log --format=format:"%aN\t%s" #{a.release}`.lines.map { |l| l.force_encoding('binary') } - excluded = `git log --format=format:"%aN\t%s" #{a.backports}`.lines.map { |l| l.force_encoding('binary') } - commits = (included - excluded).group_by { |c| c[/^[^\t]+/] } - authors = commits.keys.sort_by { |n| - commits[n].size } - team - puts authors[0..-2].join(', ') << " and " << authors.last, - "(based on commits included in #{a.release}, but not in #{a.backports})" -end - -desc "list of authors" -task :authors, [:commit_range, :format, :sep] do |t, a| - a.with_defaults :format => "%s (%d)", :sep => ", ", :commit_range => '--all' - authors = Hash.new(0) - blake = "Blake Mizerany" - overall = 0 - mapping = { - "blake.mizerany@gmail.com" => blake, "bmizerany" => blake, - "a_user@mac.com" => blake, "ichverstehe" => "Harry Vangberg", - "Wu Jiang (nouse)" => "Wu Jiang" } - `git shortlog -s #{a.commit_range}`.lines.map do |line| - line = line.force_encoding 'binary' if line.respond_to? :force_encoding - num, name = line.split("\t", 2).map(&:strip) - authors[mapping[name] || name] += num.to_i - overall += num.to_i - end - puts "#{overall} commits by #{authors.count} authors:" - puts authors.sort_by { |n,c| -c }.map { |e| a.format % e }.join(a.sep) -end - -desc "generates TOC" -task :toc, [:readme] do |t, a| - a.with_defaults :readme => 'README.md' - - def self.link(title) - title.downcase.gsub(/(?!-)\W /, '-').gsub(' ', '-').gsub(/(?!-)\W/, '') - end - - puts "* [Sinatra](#sinatra)" - title = Regexp.new('(?<=\* )(.*)') # so Ruby 1.8 doesn't complain - File.binread(a.readme).scan(/^##.*/) do |line| - puts line.gsub(/#(?=#)/, ' ').gsub('#', '*').gsub(title) { "[#{$1}](##{link($1)})" } - end -end - -# PACKAGING ============================================================ - -if defined?(Gem) - # Load the gemspec using the same limitations as github - def spec - require 'rubygems' unless defined? Gem::Specification - @spec ||= eval(File.read('sinatra.gemspec')) - end - - def package(ext='') - "pkg/sinatra-#{spec.version}" + ext - end - - desc 'Build packages' - task :package => %w[.gem .tar.gz].map {|e| package(e)} - - desc 'Build and install as local gem' - task :install => package('.gem') do - sh "gem install #{package('.gem')}" - end - - directory 'pkg/' - CLOBBER.include('pkg') - - file package('.gem') => %w[pkg/ sinatra.gemspec] + spec.files do |f| - sh "gem build sinatra.gemspec" - mv File.basename(f.name), f.name - end - - file package('.tar.gz') => %w[pkg/] + spec.files do |f| - sh <<-SH - git archive \ - --prefix=sinatra-#{source_version}/ \ - --format=tar \ - HEAD | gzip > #{f.name} - SH - end - - task 'release' => ['test', package('.gem')] do - if File.binread("CHANGES") =~ /= \d\.\d\.\d . not yet released$/i - fail 'please update changes first' unless %x{git symbolic-ref HEAD} == "refs/heads/prerelease\n" - end - - sh <<-SH - gem install #{package('.gem')} --local && - gem push #{package('.gem')} && - git commit --allow-empty -a -m '#{source_version} release' && - git tag -s v#{source_version} -m '#{source_version} release' && - git tag -s #{source_version} -m '#{source_version} release' && - git push && (git push sinatra || true) && - git push --tags && (git push sinatra --tags || true) - SH - end -end \ No newline at end of file diff --git a/data/pbcore-2.1.xsd b/data/pbcore-2.1.xsd new file mode 100644 index 0000000..24ee58f --- /dev/null +++ b/data/pbcore-2.1.xsd @@ -0,0 +1,1902 @@ + + + + This is the PBCore version 2.1draft3 XML schema. All + element descriptions can be found at http://www.pbcore.org + + + + + + Definition: The pbcoreCollection element groups + multiple pbcoreDescriptionDocument XML into one container element to allow for a + serialized output. Uses might include API returns or other web service + output. + Best practice: This element is not intended to be + equivalent to the archive/library concept of a 'collection.' Please see + pbcoreAssetType for information on how PBCore can be used to express information + about collections. The element is only applicable to XML expressions of PBCore. This + container enables a similar function to RSS; pbcoreCollection would be similar to + rss:channel and pbcoreDescription document to rss:item. + + + + + + Definition: the pbcoreDescriptionDocument element is a + root XML element for the expression of an individual PBCore record. + pbcoreDescriptionDocument can be used to express intellectual content only (e.g. a + series or collection level record with no associated instantiations), or + intellectual content with one or more instantiations (e.g. an episode of a program + with copies/instantiations on videotape and digital file). This element is only + applicable to XML expressions of PBCore. + + + + + + Definition: The pbcoreInstantiation element is the + equivalent of the instantiation element, but used for the expression of an + instantiation record at the root of an XML document. This is most commonly used when + referenced from other schemas, or if you want to create and express a single, + stand-alone instantiation. + Best practice: This is most commonly used when + Intellectual Content (in other words, descriptive metadata) is not expressed using + PBCore, but rather another standard such as MODS or Dublin Core. + + + + + + Definition: The pbcoreCollectionType schema type allows + the addition of attributes that describe the PBCoreCollection. The attributes define + the title, the description, the source, the reference and the date of the + collection. + + + + + Definition: The pbcoreDescriptionDocument + element assembles together all of PBCore knowledge items into a single data + record organized in a hierarchical structure. For PBCore these knowledge + items are metadata descriptions of media, including all the knowledge items + and metadata terms and values associated with its content and + containers. + + + + + + Definition: The collectionTitle attribute is a + title or label for the group of individual serialized XML records contained + within one pbcoreCollection element. + + + + + Definition: The collectionDescription attribute is + a description group of individual serialized XML records contained within one + pbcoreCollection element. + + + + + Definition: The collectionSource attribute + indicates an organization, application, or individual for group of individual + XML records contained within a pbcoreCollection element. + + + + + Definition: The collectionRef attribute provides a + URL for the source organization, application, or individual for a group of XML + records contained within a pbcoreCollection element. + + + + + Definition: The collectionDate attribute provides + the date of of creation for a pbcoreCollection XML document. + + + + + + + + Definition: The pbcoreDescriptionDocumentType schema + type allows its use as a single asset or repeated use in the + pbcoreCollection. + + + + + + Definition: The pbcoreAssetType element is a + broad definition of the type of intellectual content being described. Asset + types might include those without associated instantiations (a collection or + series), or those with instantiations (programs, episodes, clips, etc.)" + Best practice: The asset type should broadly + describe all related instantiations -- for example, if an asset includes + many instantiations representing different generations of a program, the + asset type 'program' remains accurate for all of them." + + + + + + Definition: The pbcoreAssetDate element is + intended to reflect dates associated with the Intellectual + Content. + Best practice: By contrast, instantiationDate + is intended to reflect date information for the specific instance. For + example, if you have a VHS copy of Gone With The Wind, the pbcoreAssetDate + would be 1939, while the instantiationDate of the VHS copy could be 1985. + pbcoreAssetDate may also be used to reflect availability dates, etc. Date + types should be specified using the @dateType attribute. Dates or time-based + events related to the content of the asset, on the other hand, would be + described in the 'coverage' element -- so, while the storyline of Gone with + the Wind takes place in the nineteenth century, this information should be + noted in the Coverage field, not the assetDate field. Best practice is to + use ISO 8601 or some other date/time standard if + possible. + + + + + + Definition: The pbcoreIdentifier element + provides an identifier that can apply to the asset. This identifier should + not be limited to a specific instantiation, but rather is shared by or + common to all instantiations of an asset. It can also hold a URL or URI that + points to the asset. + Best practice: Identify the asset by means of a + string or number corresponding to an established or formal identification + system if one exists. Otherwise, use an identification method that is in use + within your agency, station, production company, office, or + institution. + + + + + + Definition: The pbcoreTitle element is a name + or label relevant to the asset. + Best practice: An asset may have many types of + titles, an asset may have, such as a series title, episode title, segment + title, or project title; therefore the element is + repeatable. + + + + + + Definition: The pbcoreSubject element is used + to assign topic headings or keywords that portray the intellectual content + of the asset. A subject is expressed by keywords, key phrases, or even + specific classification codes. Controlled vocabularies, authorities, formal + classification codes, as well as folksonomies and user-generated tags, may + be employed when assigning descriptive subject terms. + + + + + + Definition: The pbcoreDescription element uses + free-form text or a narrative to report general notes, abstracts, or + summaries about the intellectual content of an asset. The information may be + in the form of an individual program description, anecdotal interpretations, + or brief content reviews. The description may also consist of outlines, + lists, bullet points, rundowns, edit decision lists, indexes, or tables of + content. + + + + + + Definition: The pbcoreGenre element describes + the Genre of the asset, which can be defined as a categorical description + informed by the topical nature or a particular style or form of the + content. + Best practice: Genre refers to the intellectual + content of the asset, whereas the element pbcoreAssetType defines a broader + structural category; i.e. an asset might have the Asset Type of Segment, + with a Genre of News, together defining a news segment. + + + + + + Definition: The pbcoreRelation element contains + the pbcoreRelationType and pbcoreRelationIdentifier elements. In order to + properly use these two elements they must be nested with the pbcoreRelation + element, and pbcoreRelation must contain both pbcoreRelationType and + pbcoreRelationIdentifier if it is included. + + + + + + Definition: The pbcoreRelationType + element describes the relationship between the asset being + describe by the pbcore document and any other asset. Ideally it + would contain text from a controlled vocabulary for describing + relationships. There is some depth to what a relationship could + be. The assets can be related as different episodes in a series, + different tapes in a box set, or different versions of an + original, among others. + Best practice: The assets may be + related in that they are different discrete parts of a single + intellectual unit, one may be a derivative of another, or they + may be different versions that are distinct enough to be + described as separate assets. + + + + + Definition: The + pbcoreRelationIdentifier element contains the identifier of the + related asset. In the case that the related asset has a PBCore + record, this identifier should correspond with the + pbcoreIdentifier of the related asset. However, it is possible + to use this element with a record that isn't in PBCore, in which + case the source attribute should identify the source of the + identifier. + + + + + + + + + Definition: The pbcoreCoverage element is a + container for sub-elements 'coverage' and + 'coverageType'. + + + + + + Definition: The coverage element + refers to either the geographic location or the time period + covered by the asset's intellectual content. For geographic + locations ('spatial' descriptors), it is expressed by keywords + such as place names (e.g. 'Alaska' or 'Washington, DC'), numeric + coordinates or geo-spatial data. For time-based events + ('temporal' descriptors), it is expressed by using a date, + period, era, or time-based event that is portrayed or covered in + the intellectual content (e.g. '2007' or 'Victorian Era'). The + PBCore metadata element coverage houses the actual spatial or + temporal keywords. The companion element coverageType is used to + identify the type of keywords that are being + used. + + + + + Definition: The coverageType + element is used to identify the actual type of keywords that are + being used by its companion metadata element coverage. + coverageType provides a picklist of two possible types - spatial + or temporal - because coverage in intellectual content may be + expressed spatially by geographic location or it may also be + expressed temporally by a date, period, era, or time-based + event." + + + + + + + + + + + + + + + Definition: The pbcoreAudienceLevel element + identifies a type of audience, viewer, or listener for whom the media item + is primarily designed or educationally useful. + + + + + + Definition: The pbcoreAudienceRating element + designates the type of users for whom the intellectual content of a media + item is intended or judged appropriate. This element differs from the + element pbcoreAudienceLevel in that it utilizes standard ratings that have + been crafted by the broadcast television and film industries and that are + used as flags for audience or age-appropriate materials. + + + + + + The pbcoreCreator element is a container for + sub-elements 'creator' and 'creatorRole'. + + + + + + Definition: The creator element + identifies the primary person, people, or organization(s) + responsible for creating the asset. Note that non-primary names + and roles should be included within the pbcoreContributor + container. Best practice: We recommend providing a consistent + internal standard for entering proper names and organizational + names, such as 'Last name, First name, Middle name,' or 'Main + group, subdivision.' We also recommend supplying separate + pbcoreCreator containers for each creator to be named for a + resource. + + + + + Definition: The creatorRole element + is used to identify the role played by the person, people or + organization(s) identified in the companion descriptor creator. + The PBCore schema allows for creatorRole to be repeated in the + pbcoreCreator container element. This can be useful when a + single person or organization is associated with multiple roles + in an asset. + + + + + + + + + Definition: The pbcoreContributor element is a + container for sub-elements 'contributor' and + 'contributorRole'. + + + + + + Definition: The contributor element + identifies a person, people, or organization that has made + substantial creative contributions to the asset. This + contribution is considered to be secondary to the primary + author(s) (person or organization) identified in the descriptor + creator. Best practice: We recommend providing a consistent + internal standard for entering proper names and organizational + names, such as 'Last name, First name, Middle name,' or 'Main + group, subdivision.' We also recommend supplying separate + pbcoreCreator containers for each creator to be named for a + resource. + + + + + Definition: The contributorRole + element is used to identify the role played by the person, + people or organizations identified in the companion element + contributor. The PBCore schema allows for contributorRole to be + repeated in the pbcoreContributor container element. This can be + useful when a single person or organization is associated with + multiple roles in an asset. + + + + + + + + + Definition: The pbcorePublisher element is a + container for sub-elements 'publisher' and + 'publisherRole.' + + + + + + Definition: The publisher element + identifies a person, people, or organization primarily + responsible for distributing or making the asset available to + others. The publisher may be a person, a business, organization, + group, project or service. Best practice: We recommend providing + a consistent internal standard for entering proper names and + organizational names, such as 'Last name, First name, Middle + name,' or 'Main group, subdivision.' We also recommend supplying + separate pbcoreCreator containers for each creator to be named + for a resource. + + + + + Definition: The publisherRole + element is used to identify the role played by the specific + publisher or publishing entity identified in the companion + descriptor publisher. The PBCore schema allows for publisherRole + to be repeated in the pbcorePublisher container element. This + can be useful when a single person or organization is associated + with multiple roles in an asset. + + + + + + + + + Definition: Th pbcoreRightsSummary element is a + container for sub-elements 'rightsSummary', 'rightsLink', and + 'rightsEmbedded' used to describe Rights for the asset. + + + + + + Definition: The instantiationType element + contains sub-elements that describe a single instantiation of an asset. The + definition is malleable but it should be thought of as any discreet and + tangible unit that typically (though not always) comprises a whole + representation of the asset. For example, an original master videotape, a + preservation master video file, and a low-bitrate access copy would all be + considered Instantiations of a single video program. All of the sub-elements + held by this element are used to describe the instantiation specifically, + not necessarily the asset as a whole." + + + + + + Definition: The pbcoreAnnotation element allows + the addition of any supplementary information about the metadata used to + describe the PBCore record. pbcoreAnnotation clarifies element values, + terms, descriptors, and vocabularies that may not be otherwise sufficiently + understood. + + + + + + Definition: The pbcorePart element may be used + to split up a single asset so as to enable the use of all available elements + at the pbcoreDescriptionDocument level to describe the intellectual content + of individual segments of an asset." + Best practice: Splitting up an asset in this + way allows for defining and describing segments, stories, episodes or other + divisions within the asset, such as individual films in a compilation reel, + or distinct segments of a news show when each may have their own titles, + creators, publishers, or other specific intellectual content information + that does not apply across the whole asset. + + + + + + Definition: The pbcoreExtension element can be + used as either a wrapper containing a specific element from another standard + OR embedded xml containing the extension. + Best practice: Use it to supplement other + metadata sub-elements of the PBCore description document in which it + appears. + + + + + + + + + + + Definition: The pbcoreinstantiationType schema type + uses a common structure to allow for a single instantiation or multiple + instantiations within a pbcoreDocumentDescription. + + + + + + Definition: The instantiationIdentifier element + contains an unambiguous reference or identifier for a particular + instantiation of an asset. + Best practice: Identify the media item (whether + analog or digital) by means of a string or number corresponding to an + established or formal identification system if one exists. Otherwise, use an + identification method that is in use within your agency, station, production + company, office, or institution. + + + + + + Definition: The instantiationDate element is a + date associated with an instantiation. + Best practice: Use ISO 8601 or some other + date/time standard if possible. + + + + + + Definition: The instantiationDimensions element + specifies either the dimensions of a physical instantiation, or the + high-level visual dimensions of a digital instantiation. + Best practice: For physical dimensions, usage + examples might be 7" for an audio reel. When describing visual dimensions, + use this for high-level descriptors such as 1080p. Use the element frameSize + to describe the pixel dimensions of a visual resource. + + + + + + Definition: The instantiationPhysical element + is used to identify the format of a particular instantiation as it exists in + a physical form that occupies physical space (e.g., a tape on a shelf). This + includes physical digital media, such as a DV tape, audio CD or authored + DVD, as well as analog media. + Best practice: PBCore provides a controlled + vocabulary for media objects, though any controlled vocabulary can be used + as long as it is referenced. For digital storage carriers that contain + portable file-based media, such as data CDs, LTO tapes or hard drives, use + instantiationDigital to convey the mime type of the file instead of + describing the carrier. + + + + + + Definition: The instantiationDigital element is + used to identify the format of a particular instantiation of an asset as it + exists as a digital file on a server, hard drive, or other digital storage + medium. Digital instantiations should be expressed as a formal Internet MIME + types. + Best practice: instantiationDigital should only + be used to describe the MIME type of the digital file itself. There are + multiple options to convey more information about the storage medium or + location of the digital file, which are discussed in more detail on the + PBCore site. + + + + + + Definition: The instantiationStandard element + + can be used, if the instantiation is a physical item, to refer to the + broadcast standard of the video signal (e.g. NTSC, PAL), or the audio + encoding (e.g. Dolby A, vertical cut). If the instantiation is a digital + item, instantiationStandard should be used to express the container format + of the digital file (e.g. MXF). + Best practice: While the usage described in the + definition is best practice for 2.1, this usage is likely to change if new + elements are added for PBCore 3.0. + + + + + + Definition: The instantiationLocation element + may contain information about a specific location for an instantiation, such + as an organization's name, departmental name, shelf ID and contact + information. The instantiationLocation for a digital file should include + domain, path or URI to the file. + Best practice: For digital files, + instantiationLocation should always include a path or URI to the file. There + are multiple ways to convey additional information about the location of a + carrier or storage medium of the digital file, which are expressed on the + PBCore site. + + + + + + Definition: The instantiationMediaType element + identifies the general, high level nature of the content of an + instantiation. It uses categories that show how content is presented to an + observer, e.g., as a sound, text or moving image. + + + + + + Definition: The instantiationGeneration element + identifies the use type and provenance of the instantiation. The generation + of a video tape may be an "Original Master" or "Dub", the generation of a + film reel may be an "Original Negative" or "Composite Positive", an + audiotape may be a "Master" or "Mix Element", an image may be a "Photograph" + or a "Photocopy. + + + + + + Definition: The instantiationFileSize element + indicates the file size of a digital instantiation. It should contain only + numerical values. As a standard, express the file size in bytes. Units of + Measure should be declared in the unitsOfMeasure + attribute. + + + + + + Definition: The instantiationTimeStart element + describes the point at which playback begins for a time-based instantiation. + It is likely that the content on a tape may begin an arbitrary amount of + time after the beginning of the instantiation. Best practice is to use a + timestamp format such as HH:MM:SS[:|;]FF or HH:MM:SS.mmm or + S.mmm. + + + + + + Definition: The instantiationDuration element + provides a timestamp for the overall length or duration of a time-based + media item. It represents the playback time. Best practice is to use a + timestamp format such as HH:MM:SS[:|;]FF or HH:MM:SS.mmm or + S.mmm. + + + + + + Definition: The instantiationDataRate element + expresses the amount of data in a digital media file that is encoded, + delivered or distributed, for every second of time. This should be expressed + as numerical data, with the units of measure declared in the unitsOfMeasure + attribute. For example, if the audio file is 56 kilobits/second, then 56 + should be the value of instantiationDataRate and the attribute + unitsOfMeasure should be kilobits/second. + + + + + + Definition: The instantiationColors element + indicates the overall color, grayscale, or black and white nature of the + presentation of an instantiation, as a single occurrence or combination of + occurrences in or throughout the instantiation. + + + + + + Definition: The instantiationTracks element is + simply intended to indicate the number and type of tracks that are found in + a media item, whether it is analog or digital. (e.g. 1 video track, 2 audio + tracks, 1 text track, 1 sprite track, etc.) Other configuration information + specific to these identified tracks should be described using + instantiationChannelConfiguration. + Best practice: Best practices is to use + essenceTracks, as this element has been deprecated. + + + + + + Definition: The + instantiationChannelConfiguration element is designed to indicate, at a + general narrative level, the arrangement or configuration of specific + channels or layers of information within an instantiation's tracks. Examples + are 2-track mono, 8- track stereo, or video track with alpha + channel. + + + + + + Definition: The instantiationLanguage element + identifies the primary language of the tracks’ audio or text. Languages must + be indicated using 3-letter codes standardized in ISO 639-2 or 639-3. If an + instantiation includes more than one language, the element can be repeated. + Alternately, both languages can be expressed in one element by separating + two three-letter codes with a semicolon, i.e. + eng;fre. + Best practice: + Alternative audio or text tracks and their associated languages should be + identified using the element + instantiationAlternativeModes. + + + + + + Definition: The instantiationAlternativeModes + element is a catch-all metadata element that identifies equivalent + alternatives to the primary visual, sound or textual information that exists + in an instantiation. These are modes that offer alternative ways to see, + hear, and read the content of an instantiation. Examples include DVI + (Descriptive Video Information), SAP (Supplementary Audio Program), + ClosedCaptions, OpenCaptions, Subtitles, Language Dubs, and Transcripts. For + each instance of available alternativeModes, the mode and its associated + language should be identified together, if applicable. Examples include 'SAP + in English,' 'SAP in Spanish,' 'Subtitle in French,' 'OpenCaption in + Arabic.' + + + + + + Definition: The instantiationEssenceTrack + element is an XML container element that allows for grouping of related + essenceTrack elements and their repeated use. Use instantiationEssenceTrack + element to describe the individual streams that comprise an instantiation, + such as audio, video, timecode, etc. + Best practice: Essence tracks can exist in + either the digital or physical realm. In the digital realm, they may refer + to the separate audio and video tracks within a digital file. In the + physical realm, they may refer to the video and audio tracks contained on a + single video tape. + + + + + + Definition: The instantiationRelation element + is a container for sub-elements instantiationRelationType and + instantiationRelationIdentifier to describe relationships to other + instantiations. + + + + + + Definition: The + instantiationRelationType element describes the relation between + the instantiation being described and another + instantiation. + Best practice: Use to express + relationships between instantiations, for example to note that + they are different discrete parts of a single intellectual unit, + generationally related, derivative of another, or different + versions. + + + + + Definition: The + instantiationRelationIdentifier element is used to provide a + name, locator, accession, identification number or ID where the + related item can be obtained or found. + Best practice: We recommend using a + unique identifier or global unique ID in this + element. + + + + + + + + + Definition: The instantiationRights element is + a container for sub-elements rightsSummary, rightsLink and rightsEmbedded to + describe rights particular to this instantiation." + Best practice: This element contains rights + information that is specific to an instantiation of an asset, such as rights + conferred in a donation agreement that apply only to a single donated + item. + + + + + + Definition: The instantiationAnnotation element + is used to add any supplementary information about an instantiation of the + instantiation or the metadata used to describe it. It clarifies element + values, terms, descriptors, and vocabularies that may not be otherwise + sufficiently understood. + + + + + + Definition: The instantiationPart element is a + container that allows the instantiation to be split into multiple parts, + which can describe the parts of a multi-section instantiation, e.g., a + multi-disk DVD or vitagraph record and 35mm reel that are intended for + synchronous playback. It contains all of the elements that a + pbcoreInstantiation element would typically contain. + + + + + + Definition: The instantiationExtension element + can be used as either a wrapper containing a specific element from another + standard OR embedded xml containing the extension. + Best practice: Use it to supplement other + metadata sub-elements of 'instantiationPart' or + 'pbcoreInstantiationDocument' in which it appears. + + + + + + + Definition: The instantiation level attribute group + startEndTimeGroup may be used when there is a multi-part instantiation and time + notation is important. + + + + + Definition: The instantiation level attribute group + sourceVersionGroup may be used when there is a multi-part instantiation and + notation is important. + + + + + + + Definition: The essenceTrackType schema type uses a + common structure to allow for grouping of the essence related elements and their + repeated use. + + + + + + Definition: The essenceTrackType element refers + to the media type of the decoded data. Tracks may possibly be of these + types: video, audio, caption, metadata, image, etc. + + + + + + Definition: The essenceTrackIdentifier element + is an identifier of the track. Several audiovisual containers include such + identifier schema to identify each track, such as MPEG2 PIDs or QuickTime + Track IDs. + + + + + + Definition: The essenceTrackStandard element + should be be used with file-based instantiations to describe the broadcast + standard of the video signal (e.g. NTSC, PAL) or to further clarify the + standard of the essenceTrackEncoding format. + + + + + + Definition: The essenceTrackEncoding element + essenceTrackEncoding identifies how the actual information in an + instantiation is compressed, interpreted, or formulated using a particular + scheme. Identifying the encoding used is beneficial for a number of reasons, + including as a way to achieve reversible compression; for the construction + of document indices to facilitate searching and access; or for efficient + distribution of the information across data networks with differing + bandwidths or pipeline capacities. Human-readable encoding value should be + placed here. Use @ref to identify the codec ID. + Best practice: Use @source to describe the type + of encoding reference used, such as fourcc. In @ref, use a URI/URL from the + source to identify the codec utilized by its container + format. + + + + + + Definition: The essenceTrackDataRate element + measures the amount of data used per time interval for encoded data. The + data rate can be calculated by dividing the total data size of the track's + encoded data by a time unit. By default use bytes per + second. + + + + + + Definition: The essenceTrackFrameRate element + is relevant to tracks of video track type only. The frame rate is calculated + by dividing the total number of frames by the duration of the video track. + By default measure frame rate in frames per second expressed as fps as a + unit of measure. e.g., 24 fps. + Best practice: Example: + 1920x1080. + + + + + + Definition: The essenceTrackPlaybackSpeed + element specifies the rate of units against time at which the media track + should be rendered for human consumption. e.g., 15ips (inches per + second). + + + + + + Definition: The essenceTrackSamplingRate + element measures how often data is sampled when information from the audio + portion from an instantiation is digitized. For a digital audio signal, the + sampling rate is measured in kilohertz and is an indicator of the perceived + playback quality of the media item (the higher the sampling rate, the + greater the fidelity). + + + + + + Definition: The essenceTrackBitDepth element + specifies how much data is sampled when information is digitized, encoded, + or converted for an instantiation (specifically, audio, video, or image). + Bit depth is measured in bits and generally implies an arbitrary perception + of quality during playback of an instantiation (the higher the bit depth, + the greater the fidelity). + + + + + + Definition: The essenceTrackFrameSize element + measures the width and height of the encoded video or image track. The frame + size refers to the size of the encoded pixels and not the size of the + displayed image. It may be expressed as combination of pixels measured + horizontally vs. the number of pixels of image/resolution data stacked + vertically (interlaced and progressive scan). + + + + + + Definition: The essenceTrackAspectRatio element + indicates the ratio of horizontal to vertical proportions in the display of + a static image or moving image. + + + + + + Definition: The essenceTrackTimeStart element + provides a time stamp for the beginning point of playback for a time-based + essence track. It is likely that the content on a tape may begin an + arbitrary amount of time after the beginning of the + instantiation. + Best practice: Use in combination with + essenceTrackDuration to identify a sequence or segment of an essence track + that has a fixed start time and end time. Best practice is to use a + timestamp format such as HH:MM:SS[:|;]FF or HH:MM:SS.mmm or + S.mmm. + + + + + + Definition: The essenceTrackDuration element + provides a timestamp for the overall length or duration of a track. It + represents the track playback time. Best practice is to use a timestamp + format such as HH:MM:SS[:|;]FF or HH:MM:SS.mmm or S.mmm. + + + + + + Definition: The essenceTrackLanguage element + identifies the primary language of the tracks' audio or + text. + Best practice: Alternative audio or text tracks + and their associated languages should be identified using the element + alternativeModes. + + + + + + Definition: The essenceTrackAnnotation element + can store any supplementary information about a track or the metadata used + to describe it. It clarifies element values, terms, descriptors, and + vocabularies that may not be otherwise sufficiently + understood. + + + + + + Definition: The essenceTrackExtension element + can be used as either a wrapper containing a specific element from another + standard OR embedded xml containing the extension. The essenceTrackExtension + element is a container to accomodate track-level metadata from external + systems. Use it to supplement other metadata sub-elements of + instantiationEssenceTrack in which it appears. + + + + + + + + + Definition: The extensionType schema type uses a common + structure to allow for the use of multiple, qualified extensions at the asset, + instantiation and essence levels. + + + + + Definition: The extensionWrap element serves as + a container for the elements extensionElement, extensionValue, and + extensionAuthorityUsed. + + + + + + Definition: The extensionElement + element should contain the name of an element used from another + metadata standard, in the case that an element from another + + metadata standard is used. While we recommend the usage of an + existing standard, this element can also be used to define local + elements that may not be part of an existing standard." + Best practice: These extensions + fulfill the metadata requirements for communities identifying + and describing their own types of media with specialized, custom + terminologies. + + + + + Definition: The extensionValue + element is used to express the data value of the label indicated + by extensionElement. + + + + + Definition: The + extensionAuthorityUsed element identifies the authority used for + the extensionElement. + Best practice: If metadata + extensions to PBCore are assigned to a media item with the + element extensionElement, and the terms used are derived from a + specific authority or metadata scheme, use + extensionAuthorityUsed to identify whose metadata extensions are + being used. + + + + + + + + + Definition: The extensionEmbedded element + allows the inclusion of xml from another schema, e.g. TEI, METS, + etc. + + + + + + + + Definition: The pbcorePartType schema type uses a + common structure to allow for the repeating of descriptive sub-documents to define + different segments, episodes etc., just as super-element 'pbcoreDescriptionDocument' + can be collected and used to describe higher-level media + programs. + + + + + + Definition: The group of attributes + "startTime', 'endTime' and 'timeAnnotation' could be used when a there + is a multipart asset and time notation is important. + + + + + + Definition: The partType attribute is used to indicate + the nature of the part into which the asset has been + divided. + + + + + Definition: The partTypeSource attribute provides the + name of the authority used to declare data value of the partType + attribute. + Best practice: This might be the name of a controlled + vocabulary, namespace or authority list, such as the official PBCore + vocabulary. We recommend a consistent and human readable + use. + + + + + Definition: The partTypeRef attribute is used to supply a + source's URI for the value of the attribute + titleTypeSource. + Best practice: The partTypeRef attribute can be used to + point to a term in a controlled vocabulary, or a URI associated with a + source. + + + + + Definition: The partTypeVersion attribute identifies any + version information about the authority or convention used to express + data of this element. + + + + + Definition: The partTypeAnnotation attribute includes + narrative information intended to clarify the nature of data used in the + element. + Best practice: This attribute can be used as a notes + field to include any additional information about the element or + associated attributes + + + + + + + + + Definition: The dateStringType schema type allows for + the addition of the dateType attribute. + + + + + + Definition: The dateType attribute + classifies by named type the date-related data of the element e.g., + created, broadcast, dateAvailableStart. + Best practice: Used to clarify how the date + is related to the asset or instantiation. Date Created may be the most + common, but the element could also be used to describe the Date + Accessioned or Date Deaccessioned, for example. + + + + + + + + + + Definition: The sourceVersionStringType schema type is + used with a number of elements to allow the attachment of the attributes: source, + ref, version and annotation. + + + + + + + + + + + Definition: The requiredSourceVersionStringType schema + type type is the same as sourceVersionStringType with the addition that the source + attribute is required instead of optional. + + + + + + Definition: The source attribute provides + the name of the authority used to declare the value of the + element. + Best practice: Different elements will use + the source attribute slightly differently. For example, identifier + source (required) should be the name of the organization, institution, + system or namespace that the identifier came from, such as "PBS NOLA + Code" or an institutional database identifier. For other elements, this + might be the name of a controlled vocabulary, namespace or authority + list, such as Library of Congress Subject Headings. We recommend a + consistent and human readable use. + + + + + Definition: The ref attribute is used to + supply a source's URI for the value of the element. + Best practice: Attribute ref can be used to + point to a term in a controlled vocabulary, or a URI associated with a + source. + + + + + Definition: The version attribute + identifies any version information about the authority or convention + used to express data of this element. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + Definition: The annotation attribute + includes narrative information intended to clarify the nature of data + used in the element. + + + + + + + + + Definition: The titleStringType schema type allows for + the addition of a titleType attribute as well as the standard sourceVersionGroup + attributes and a startEndTimeGroup or attributes. + + + + + + Definition: The titleType attribute is used + to indicate the type of title being assigned to the asset, such as + series title, episode title or project title. + + + + + Definition: The titleTypeSource attribute + is used to provides the name of the authority used to declare data value + of the titleType attribute. + Best practice: This might be the name of a + controlled vocabulary, namespace or authority list, such as the official + PBCore vocabulary. We recommend a consistent and human readable + use. + + + + + Definition: The titleTypeRef attribute is + used to supply a source's URI for the value of the attribute + titleTypeSource. + Best practice: Attribute titleTypeRef can + be used to point to a term in a controlled vocabulary, or a URI + associated with a source. + + + + + Definition: The titleTypeVersion attribute + identifies any version information about the authority or convention + used to express data of this element. + + + + + Definition: The titleTypeAnnotation + attribute includes narrative information intended to clarify the nature + of data used in the element. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + + + + + + + Definition: The subjectStringType schema type allows + for the addition of a subjectType attribute as well as the standard + sourceVersionGroup attributes and a startEndTimeGroup or + attributes. + + + + + + Definition: The subjectType attribute is + used to indicate the type of subject being assigned to the attribute + subjectType, such as 'topic,' 'personal name,' or + 'keyword'. + + + + + Definition: The subjectTypeSource attribute + provides the name of the authority used to declare the value of the + attribute subjectType. + Best practice: This might be the name of a + controlled vocabulary, namespace or authority list, such as the official + PBCore vocabulary. We recommend a consistent and human readable + use. + + + + + Definition: The subjectTypeRef attribute is + used to supply a source's URI for the value of the attribute + subjectType. + Best practice: Attribute subjectTypeRef can + be used to point to a term in a controlled vocabulary, or a URI + associated with a source. + + + + + Definition: The subjectTypeVersion + attribute identifies any version information about the authority or + convention used to express data of the attribute + subjectType. + + + + + Definition: The subjectTypeAnnotation + attribute includes narrative information intended to clarify the nature + of data used in the attribute subjectType. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + + + + + + + Definition: The descriptionType schema type is a + complex group of attributes that help define the description type, as well as + allowing for descriptions of segments and relevant times. + + + + + + Definition: The descriptionType attribute + is used to indicate the type of description being assigned to the + element, such as 'abstract,' 'summary,' or 'physical + description.' + + + + + Definition: The descriptionTypeSource + attribute provides the name of the authority used to declare data value + of the attribute descriptionType. + Best practice: This might be the name of a + controlled vocabulary, namespace or authority list, such as the official + PBCore recommended vocabulary. We recommend a consistent and human + readable use. + + + + + Definition: The descriptionTypeRef + attribute is used to supply a source's URI for the value of the + attribute descriptionType. + Best practice: The descriptionTypeRef + attribute can be used to point to a term in a controlled vocabulary, or + a URI associated with a source. + + + + + Definition: The descriptionTypeVersion + attribute identifies any version information about the authority or + convention used to express data of the attribute + descriptionType. + + + + + Definition: The descriptionTypeAnnotation + attribute includes narrative information intended to clarify the nature + of data used in the element. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + Definition: The segmentType attribute is + used to define the type of content contained in a + segment. + Best practice: We recommend using + description and descriptionType instead of + segmentType.' + + + + + Definition: The segmentTypeSource attribute + provides the name of the authority used to declare data value of the + attribute segmentType. + Best practice: This might be the name of a + controlled vocabulary, namespace or authority list, such as the official + PBCore recommended vocabulary. + + + + + Definition: The segmentTypeRef attribute is + used to supply a source's URI for the value of the attribute + segmentType. + Best practice: Attribute segmentTypeRef can + be used to point to a term in a controlled vocabulary, or a URI + associated with a source. + + + + + Definition: The segmentTypeVersion + attribute identifies any version information about the authority or + convention used to express data of the attribute + segmentType. + + + + + Definition: The segmentTypeAnnotation + attribute includes narrative information intended to clarify the nature + of data used in the attribute segmentType. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + + + + + + + Definition: The sourceVersionStartEndStringType adds + attributes that define the source of the string with the option of time related + attributes + + + + + + + + + + + + Definition: The affiliatedStringType adds attributes of + affiliation and time relevance. + + + + + + Definition: The affiliation attribute is + used to indicate the organization with which an agent is associated or + affiliated. + + + + + Definition: The affiliationSource attribute + provides the name of the authority used to declare the value of the + attribute affiliation. + Best practice: This might be the name of a + controlled vocabulary, namespace or authority list, such as the official + PBCore recommended vocabulary. + + + + + Definition: The affilationRef attribute is + used to supply a source's URI for the value of the attribute + affiliation. + Best practice: Attribute affiliationRef can + be used to point to a term in a controlled vocabulary, or a URI + associated with a source. + + + + + Definition: The affiliationVersion + attribute identifies any version information about the authority or + convention used to express data of the attribute + affiliation. + + + + + Definition: The affiliationAnnotation + attribute includes narrative information intended to clarify the nature + of data used in the attribute affiliation. + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + + + + + + + Definition: The contributorString helps define the + portrayal role as well as the general source and version group + attributes. + + + + + + Definition: The portrayal attribute + identifies any roles or characters performed by a + contributor. + + + + + + + + + + Definition: The technicalStringType schema type adds + the attributes of unitsOfMeasure and annotation. + + + + + + Definition: The unitsOfMeasure attribute + defines the unit used in the containing element, e.g. pixels, GB, Mb/s, + ips, fps, kHz, inches, lines, dpi. + Best practice: We recommend standardizing + the notation that is most widely recognized in your institution and + using with consistency. + + + + + + + + + + Definition: The instantiationStandardStringType schema + type allows for the addition of a profile attribute along with the + sourceVersionGroup. + + + + + + Definition: The profile attribute is used + to further quantify the profile of the container format (e.g. + Op1a). + Best practice: This attribute can be used + as a notes field to include any additional information about the element + or associated attributes. + + + + + + + + + + Definition: The stringType schema type added an + annotationType attribute and a reference. + + + + + + Definition: Use the attribute + annotationType to indicate the type of annotation being assigned to the + asset, such as a comment, clarification, or cataloging + note. + + + + + + + + + + Definition: The rightsSumaryType schema type allows the + use of rights at the asset level and the instantiation level. The rights can be + expressed as a summary or a link or an embedded XML record. These can also contain + time relations. + + + + + Definition: The rightsSummary element is used + as a general free-text element to identify information about copyrights and + property rights held in and over an asset or instantiation, whether they are + open access or restricted in some way. If dates, times and availability + periods are associated with a right, include them. End user permissions, + constraints and obligations may also be identified as + needed. + Best practice: For rights information that + applies to the asset as a whole, use this element within the container + pbcoreRightsSummary. For rights information that is specific to an + instantiation of an asset, use it within the container + instantiationRights. + + + + + Definition: The rightsLink element is a URI + pointing to a declaration of rights. + + + + + Definition: The rightsEmbedded element allows + the inclusion of xml from another rights standard, e.g. ODRL, METS, etc. The + included XML then defines the rights for the PBCore asset and/or PBCore + instantiation. + + + + + + + + + Definition: The rightsLinkType schema type allows for + the addition of an annotation attribute to the rightsLink. + + + + + + + + + + + Definition: The embeddedType schema type allows for the + addition of an annotation attribute to the embeddedType. + + + + + + + + + + Definition: The threeletterStringType adds the + sourceVersionGroup to threelettercode for source references. + + + + + + + + + + + Definition: This algorithm controls the language + element to insure the use of three letter codes. + + + + + + + + + + Definition: The grouping of attributes: source, + reference, version and annotation. + + + + Definition: The source attribute provides the name + of the authority used to declare the value of the element. + Best practice: Different elements will use the + source attribute slightly differently. For example, identifier source (required) + should be the name of the organization, institution, system or namespace that + the identifier came from, such as "PBS NOLA Code" or an institutional database + identifier. For other elements, this might be the name of a controlled + vocabulary, namespace or authority list, such as Library of Congress Subject + Headings. We recommend a consistent and human readable use. + + + + + Definition: The ref attribute is used to supply a + source's URI for the value of the element. + Best practice: Attribute ref can be used to point + to a term in a controlled vocabulary, or a URI associated with a + source. + + + + + Definition: The version attribute identifies any + version information about the authority or convention used to express data of + this element. + + + + + Definition: The annotation attribute includes + narrative information intended to clarify the nature of data used in the + element. + Best practice: This attribute can be used as a + notes field to include any additional information about the element or + associated attributes. + + + + + + + Definition: The grouping of attributes: startTime, + endTime and timeAnnotation. + + + + Definition: The startTime attribute combines with + the endTime attribute to define a specific media segment within a broader + timeline of an asset and/or instantiation. + Best practice: This is a free text attribute and + can be applied at the asset or instantiation level. When used at the asset + level, it may be used to talk generally about the start/end time of a segment + (e.g. "30 minutes"), or by providing a timestamp to a specific point in an + instantiation. If you're doing that for element at the asset level, we suggest + referencing the instantiation ID you are referring to in timeAnnotation. One + example would be if a six-hour long tape was broken into multiple programs, and + each instantiation might have its start time labeled as when the instantiation + began in the timeline of the broader tape. Another example for this usage might + be a digital file created from a VHS tape that contains multiple segments. In + the digital copy, color bars are removed from the beginning, and black from the + end of the digital instantiation. Time references referring to the segments on + the physical VHS are no longer relevant; therefore it's important to tie start + and end time references to a specific instantiation, e.g. use the asset ID and + timestamp. + + + + + Definition: The endTime attribute combines with a + similar value in the startTime attribute to define a specific media segment + within a broader timeline of an asset and/or instantiation. + + + + + Definition: The timeAnnotation attribute includes + narrative information intended to clarify any time-oriented nature of data used + in the element. + + + + diff --git a/lib/validator.rb b/lib/validator.rb index 826770e..adc7344 100644 --- a/lib/validator.rb +++ b/lib/validator.rb @@ -26,6 +26,7 @@ class Validator # List of supported XSDs PBCORE_VERSIONS = { # "1.1" => { :version => "PBCore 1.1", :xsd => "PBCoreXSD_Ver_1-1_Final.xsd" }, + "2.1" => { :version => "PBCore 2.1", :xsd => "pbcore-2.1.xsd" }, "2.0" => { :version => "PBCore 2.0", :xsd => "PBCoreXSD_v2.xsd" }, "1.2" => { :version => "PBCore 1.2", :xsd => "PBCoreSchema_v1-2.xsd" }, "1.2.1" => { :version => "PBCore 1.2.1", :xsd => "PBCoreXSD_Ver_1-2-1.xsd" }, @@ -130,7 +131,10 @@ module Picklists "Videotape Recordist", "Vidifont Operator", "Vocalist", "VTR Recordist", "Wardrobe", "Writer", "Other" ] - + + COVERAGE_TYPES = [ + 'Spatial','Temporal' + ] PUBLISHER_ROLES = [ "Copyright Holder", "Distributor", "Presenter", "Publisher", "Release Agent", "Other" @@ -298,6 +302,10 @@ module Picklists "Interactive Resource", "Moving Image", "Physical Object", "Presentation", "Service", "Software", "Sound", "Static Image", "Text" ] + + INSTANTIATION_MEDIA_TYPES = [ + 'Moving Image', 'Audio' + ] GENERATIONS = [ "Artifact/Award", "Artifact/Book", "Artifact/Costume", @@ -437,6 +445,8 @@ def checkbestpractices return if @practices_checked || @xml.nil? @practices_checked = true + + check_picklist('titleType', Picklists::TITLE_TYPES) check_lists('subject') check_picklist('descriptionType', Picklists::DESCRIPTION_TYPES) @@ -450,6 +460,8 @@ def checkbestpractices check_picklist('formatPhysical', Picklists::PHYSICAL_FORMATS) check_picklist('formatDigital', Picklists::DIGITAL_FORMATS) check_picklist('formatMediaType', Picklists::MEDIA_TYPES) + check_picklist('instantiationMediaType', Picklists::INSTANTIATION_MEDIA_TYPES , 'You’re using a value for instantiationMediaType that is neither Moving Image nor Audio. While this is valid, we recommend using one of the standardized values from the controlled vocabulary for this element: http://pbcore.org/pbcore-controlled-vocabularies/instantiationmediatype-vocabulary/' ) + check_picklist('coverageType', Picklists::COVERAGE_TYPES , 'It looks like you’re using a value for coverageType that is neither Spatial nor Temporal. For valid PBCore, you must use one of the standardized values from the controlled vocabulary for this element: http://metadataregistry.org/concept/list/vocabulary_id/149.html' ) check_picklist('formatGenerations', Picklists::GENERATIONS) check_picklist('formatColors', Picklists::FORMAT_COLORS) check_picklist('essenceTrackType', Picklists::ESSENCE_TRACK_TYPES) @@ -457,6 +469,53 @@ def checkbestpractices check_names('contributor') check_names('publisher') check_only_one_format + + + + + + check_min_one_subelements('pbcoreCollection',['pbcoreDescriptionDocument'],"") + ['pbcoreDescriptionDocument','pbcorePart'].each do |parentname| check_min_one_subelements(parentname,['pbcoreIdentifier','pbcoreTitle','pbcoreDescription'],"") ; end ; + + check_element_has_attribute('pbcoreIdentifier','source',"") + +# check_min_one_subelements('pbcoreRelation',['pbcoreRelationType','pbcoreRelationIdentifier'],"") +# check_max_one_subelements('pbcoreRelation',['pbcoreRelationIdentifier','pbcoreRelationType'],"") + ['pbcoreRelationType','pbcoreRelationIdentifier'].each do |subname| check_only_one_subelement('pbcoreRelation',subname.split(),"must contain two subelements and only one '#{subname}.' Please repeat the entire 'pbcoreRelation' container element to express each relationship.") ; end ; + + check_only_one_subelement('pbcoreCoverage',['coverage'],"should contain only one 'coverage' subelement. Please repeat the entire pbcoreCoverage container element for each instance of coverage.") + + check_only_one_subelement('pbcoreCreator',['creator'],"should contain only one 'creator' subelement. Please repeat the entire pbcoreCreator container element for each instance of creator.") + check_only_one_subelement('pbcoreContributor',['contributor'],"should contain only one 'contributor' subelement. Please repeat the entire pbcoreContributor container element for each instance of contributor.") + check_only_one_subelement('pbcorePublisher',['publisher'],"should contain only one 'publisher' subelement. Please repeat the entire pbcorePublisher container element for each instance of publisher.") + check_only_one_subelement('pbcoreRightsSummary',['rightsSummary', 'rightsLink','rightsEmbedded'],"should contain only one subelement. Please repeat the entire pbcoreRightsSummary container element for each rightsSummary, rightsLink, or rightsEmbedded.") + ['pbcoreInstantiationDocument','instantiationPart'].each do |parentname| check_min_one_subelements(parentname,['instantiationIdentifier','instantiationLocation'],"") ; end ; + check_element_has_attribute('instantiationIdentifier','source',"") + ['pbcoreInstantiationDocument','instantiationPart'].each do |parentname| check_max_one_subelements(parentname,['instantiationPhysical','instantiationDigital','instantiationStandard','instantiationLocation','instantiationMediaType','instantiationFileSize','instantiationTimeStart','instantiationDuration','instantiationDataRate','instantiationColors','instantiationTracks','instantiationChannelConfiguration','instantiationChannelConfiguration'],"") ; end ; + + check_only_one_subelement('instantiationRights',['rightsSummary', 'rightsLink','rightsEmbedded'],"should contain only one subelement. Please repeat the entire instantiationRights container element for each rightsSummary, rightsLink, or rightsEmbedded.") + + ['pbcoreExtension','instantiationExtension'].each do |parentname| check_only_one_subelement(parentname,['extensionWrap','extensionEmbedded'],"should contain only one subelement. Please repeat the entire '#{parentname}' container element for each 'extensionWrap' or 'extensionEmbedded'") ; end ; + ['extensionElement','extensionValue'].each do |subname| check_only_one_subelement('extensionWrap',subname.split(),"must contain one '#{subname}' subelement.") ; end ; + + + check_valid_characters(['instantiationFileSize', 'instantiationDataRate', 'essenceTrackDataRate', 'essenceTrackFrameRate', 'essenceTrackPlaybackSpeed', 'essenceTrackSamplingRate', 'essenceTrackBitDepth', 'essenceTrackFrameSize', 'essenceTrackAspectRatio'],"g/[0-9]:x\///", msg = "For best practice, this technical element should only contain numeric values. To express a unit of measure for this element, we recommend using the @unitsOfMeasure attribute.") + check_valid_characters(['instantiationTimeStart', 'instantiationDuration', 'essenceTrackTimeStart', 'essenceTrackDuration'],"g/[0-9]:\.//", msg = "This is valid, but we recommend using a timestamp format for this element, such as HH:MM:SS:FF or HH:MM:SS.mmm or S.mmm.") + check_valid_length_codes(['instantiationLanguage', 'essenceTrackLanguage'], ';', "For valid PBCore, please use one of the ISO 639.2 or 639.3 standard language codes, which can be found at http://www.loc.gov/standards/iso639-2/ and http://www-01.sil.org/iso639-3/codes.asp. You can describe more than one language in the element by separating two three-letter codes with a semicolon, i.e. eng;fre.") + + # sort the error messages by line number + tmperrors=[] + lastline=@xml.to_s.gsub(13.chr+10.chr,10.chr).tr(13.chr,10.chr).split(10.chr).count # figure out how to get the right number: @xml.last.line_num isn't it + (1..lastline).reverse_each do |lnum| @errors.select {|msg| msg.to_s.match(" line #{lnum.to_s} ") || msg.to_s.match(" at :#{lnum.to_s}" + 46.chr)}.each do |y| tmperrors<< y if not tmperrors.include?(y); end ; end + # wacky that each item in tmperrors array is a 1-count array + if @errors.to_s.include?(' element is not expected') + tmperrors << ["===="] + tmperrors << ["Error(s) below about 'expected' elements are about what appears out of the expected order: missing (required) elements will be cited further; otherwise, consult PBCore documentation for proper sequencing."] + end + + # is it necessary to examine @errors for things *not* in tmperrors? they would fail assumption of line# test + @errors = tmperrors.reverse.reject {|x| x == []}.flatten + end # returns true iff the document is perfectly okay @@ -502,12 +561,12 @@ def rxml_error(err) #:nodoc: end private - def check_picklist(elt, picklist) + def check_picklist(elt, picklist, msg = "") each_elt(elt) do |node| if node.content.strip.empty? @errors << "#{elt} on #{node.line_num} is empty. Perhaps consider leaving that element out instead." elsif !picklist.any?{|i| i.downcase == node.content.downcase} - @errors << "“#{node.content}” on line #{node.line_num} is not in the PBCore suggested picklist value for #{elt}." + @errors << "“#{node.content}” on line #{node.line_num} is not in the PBCore suggested picklist value for #{elt}. " + msg.to_s end end check_lists(elt) @@ -535,10 +594,83 @@ def check_only_one_format each_elt("pbcoreInstantiation") do |node| if node.find(".//pbcore:formatDigital", "pbcore:#{PBCORE_NAMESPACE}").size > 0 && node.find(".//pbcore:formatPhysical", "pbcore:#{PBCORE_NAMESPACE}").size > 0 - @errors << "It looks like the instantiation on line #{node.line_num} contains both a formatDigital and a formatPhysical element. This is probably not what you intended." + @errors << "It looks like the instantiation on line #{node.line_num} contains both a formatDigital and a formatPhysical element. This is valid, but not recommended in PBCore XML." + else + if node.find(".//pbcore:instantiationDigital", "pbcore:#{PBCORE_NAMESPACE}").size > 0 && + node.find(".//pbcore:instantiationPhysical", "pbcore:#{PBCORE_NAMESPACE}").size > 0 + @errors << "It looks like the instantiation on line #{node.line_num} contains both a instantiationDigital and a instantiationPhysical element. This is valid, but not recommended in PBCore XML." + end end end end + + def check_element_has_attribute(elementname,attributename,msg="") + each_elt(elementname.to_s) do |node| + isMissing=true + node.attributes.each {|attribute| isMissing=false if attribute.name == attributename } + # node.attributes.get_attribute(attributename) + if isMissing + @errors << "Element '#{elementname}' at line #{node.line_num} must contain the attribute '#{attributename}' " + msg.to_s + end + end + end + + def check_only_one_subelement(parentname,subnames,msg = "") +# subsum=0 + each_elt(parentname.to_s) do |node| + subsum=0 + subnames.each do |subname| + subsum = subsum + node.find("./pbcore:#{subname}", "pbcore:#{PBCORE_NAMESPACE}").size + end + if subsum != 1 + @errors << "Element '#{parentname}' near line #{node.line_num} " + msg.to_s + end + end + end + + def check_max_one_subelements(parentname,subnames,msg = "") + each_elt(parentname.to_s) do |node| + subnames.each do |subname| + subsum = node.find("./pbcore:#{subname}", "pbcore:#{PBCORE_NAMESPACE}").size + if subsum > 1 + @errors << "Element '#{subname}' near line #{node.line_num} isn’t repeatable. For valid PBCore, please find another way to incorporate that information. " + msg.to_s + end + end + end + end + + def check_min_one_subelements(parentname,subnames,msg = "") + each_elt(parentname.to_s) do |node| + subnames.each do |subname| + subsum = node.find("./pbcore:#{subname}", "pbcore:#{PBCORE_NAMESPACE}").size + if subsum < 1 + @errors << "Element '#{parentname}' near line #{node.line_num} is missing required subelement '#{subname}.' For valid PBCore, please add a value for this element." + msg.to_s + end + end + end + end + + def check_valid_characters(elements_array,validstring = "", msg = "") + elements_array.each do |elt| + each_elt(elt.to_s) do |node| + if node.content.tr(validstring,"") != "" + @errors << "Element '#{node.name}' at line #{node.line_num} contains invalid #{node.content.tr(validstring,"").length} characters. " + msg.to_s + end + end + end + end + + def check_valid_length_codes(elements_array, delimiter = ';' ,msg = "") + elements_array.each do |elt| + each_elt(elt.to_s) do |node| + xcount=node.content.split(delimiter).select{|x| x.length < 2 || x.length > 3}.length + if xcount != 0 + @errors << "Element '#{node.name}' at line #{node.line_num} contains #{xcount} invalid value#{'s' if xcount > 1}. " + msg.to_s + end + end + end + end + def each_elt(elt) @xml.find("//pbcore:#{elt}", "pbcore:#{PBCORE_NAMESPACE}").each do |node| diff --git a/public/favicon.ico b/public/favicon.ico index fdb61b2..c1f0f19 100644 Binary files a/public/favicon.ico and b/public/favicon.ico differ diff --git a/public/logo.png b/public/logo.png index 24fff3f..f434d65 100644 Binary files a/public/logo.png and b/public/logo.png differ diff --git a/templates/index.haml b/templates/index.haml index 6010618..9a56544 100644 --- a/templates/index.haml +++ b/templates/index.haml @@ -1,9 +1,7 @@ :textile Hello, this is an extremely preliminary PBCore validator. It can detect certain types of errors and deviations from "best practices" in "PBCore":http://www.pbcore.org/ records. - Note that this tool is *not* officially endorsed by anyone affiliated with PBCore. - - For more information, see "the blog post introducting the validator":http://vermicel.li/blog/2009/02/02/introducing-the-pbcore-validator.html. + For more information, see "the blog post introducing the validator":http://vermicel.li/blog/2009/02/02/introducing-the-pbcore-validator.html. %h2 Validate a document %form{ :action => "/validator", :method => "post", :enctype => "multipart/form-data" } @@ -30,4 +28,6 @@ You can "browse the source code":http://github.com/mlc/pbcorevalidator or "contact the author":mailto:mike@vermicel.li. - This work employs PBCore. The PBCore (Public Broadcasting Metadata Dictionary) was created by the public broadcasting community in the United States of America for use by public broadcasters and others. Initial development funding for PBCore was provided by the Corporation for Public Broadcasting. The PBCore is built on the foundation of the Dublin Core (ISO 15836), an international standard for resource discovery (http://dublincore.org), and has been reviewed by the Dublin Core Metadata Initiative Usage Board. Copyright: 2005, Corporation for Public Broadcasting. +

This work employs PBCore.

+

PBCore, the Public Broadcasting Metadata Dictionary, is licensed under a Creative Commons Attribution 4.0 International license. (Full legal code available here.)

+

PBCore, the Public Broadcasting Metadata Dictionary Project, was created by the public broadcasting community in the United States of America for use by public broadcasters and others. PBCore is built on the foundation of the Dublin Core (ISO 15836), an international standard for resource discovery. PBCore was developed with funding provided by the Corporation for Public Broadcasting and is maintained by WGBH. Copyright 2018, WGBH Educational Foundation, on behalf of the American Archive of Public Broadcasting.

diff --git a/views/index.haml b/views/index.haml index c29597a..9a56544 100644 --- a/views/index.haml +++ b/views/index.haml @@ -1,9 +1,7 @@ :textile Hello, this is an extremely preliminary PBCore validator. It can detect certain types of errors and deviations from "best practices" in "PBCore":http://www.pbcore.org/ records. - Note that this tool is *not* officially endorsed by anyone affiliated with PBCore. - - For more information, see "the blog post introducting the validator":http://vermicel.li/blog/2009/02/02/introducing-the-pbcore-validator.html. + For more information, see "the blog post introducing the validator":http://vermicel.li/blog/2009/02/02/introducing-the-pbcore-validator.html. %h2 Validate a document %form{ :action => "/validator", :method => "post", :enctype => "multipart/form-data" } @@ -26,8 +24,10 @@ %hr :textile - This PBCore validate update was started on 10/07/2014 by Tessa Fallon for the AMIA/DLF Hack Day. The PBCore validator is written by mike castleman and is copyright (c) 2009 by "Roasted Vermicelli, LLC":http://vermicel.li/ and released under the terms of the "GNU General Public License":http://www.gnu.org/licenses/gpl.html, "version 3":COPYING.txt or later. + The PBCore validator is written by mike castleman and is copyright (c) 2009 by "Roasted Vermicelli, LLC":http://vermicel.li/ and released under the terms of the "GNU General Public License":http://www.gnu.org/licenses/gpl.html, "version 3":COPYING.txt or later. You can "browse the source code":http://github.com/mlc/pbcorevalidator or "contact the author":mailto:mike@vermicel.li. - This work employs PBCore. The PBCore (Public Broadcasting Metadata Dictionary) was created by the public broadcasting community in the United States of America for use by public broadcasters and others. Initial development funding for PBCore was provided by the Corporation for Public Broadcasting. The PBCore is built on the foundation of the Dublin Core (ISO 15836), an international standard for resource discovery (http://dublincore.org), and has been reviewed by the Dublin Core Metadata Initiative Usage Board. Copyright: 2005, Corporation for Public Broadcasting. +

This work employs PBCore.

+

PBCore, the Public Broadcasting Metadata Dictionary, is licensed under a Creative Commons Attribution 4.0 International license. (Full legal code available here.)

+

PBCore, the Public Broadcasting Metadata Dictionary Project, was created by the public broadcasting community in the United States of America for use by public broadcasters and others. PBCore is built on the foundation of the Dublin Core (ISO 15836), an international standard for resource discovery. PBCore was developed with funding provided by the Corporation for Public Broadcasting and is maintained by WGBH. Copyright 2018, WGBH Educational Foundation, on behalf of the American Archive of Public Broadcasting.

diff --git a/views/style.sass b/views/style.sass index ac45cb1..f2b435e 100644 --- a/views/style.sass +++ b/views/style.sass @@ -16,7 +16,7 @@ textarea :color red h1 - :padding-left 76px + :padding-left 150px :background :image url(logo.png) :repeat no-repeat