diff --git a/README.md b/README.md index ad89666..b5dfd26 100644 --- a/README.md +++ b/README.md @@ -57,3 +57,15 @@ This pack is based on our [fork of the hdt-cpp](https://github.com/JanWielemaker/hdt-cpp). The fork is mostly the work of Peter Ludemann, fixing several issues with modern C++ and libraries. + +## Notes + +The `hdt-cpp` directory has a number of utilities in +`hdt-cpp/libhdt/tools`. For details, see `hdt-cpp/README.md`: +* hdt2rdf +* hdtInfo +* hdtSearch +* modifyHeader +* rdf2hdt +* replaceHeader +* searchHeader diff --git a/c/hdt4pl.cpp b/c/hdt4pl.cpp index c55d402..8df44e4 100644 --- a/c/hdt4pl.cpp +++ b/c/hdt4pl.cpp @@ -624,31 +624,56 @@ PREDICATE(hdt_search_cost_id, 5) PREDICATE(hdt_create_from_file, 3) { static PlAtom ATOM_base_uri("base_uri"); + static PlAtom ATOM_format("format"); char *hdt_file, *rdf_file; HDTSpecification spec; std::string base_uri("http://example.org/base"); + RDFNotation notation = NTRIPLES; if ( !A1.get_file_name(&hdt_file, PL_FILE_OSPATH) || - !A1.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) ) + !A2.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) ) return false; PlTerm_tail options(A3); PlTerm_var opt; while(options.next(opt)) - { PlAtom name(PlAtom::null); + { PlAtom name(PlAtom::null); size_t arity; if ( opt.get_name_arity(&name, &arity) && arity == 1 ) { PlTerm ov = opt[1]; - if ( name == ATOM_base_uri ) - base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + { base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + } else if ( name == ATOM_format ) + { std::string format = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + for ( auto &c : format ) + c = toupper(c); + // The following are the supported values per hdt-cpp/libhdt/include/HDTEnums.hpp + // and hdtInfo -h (which lists nquads,nq,ntriples,nt,trig,turtle,ttl + // but if an unsupported value is specified gives: + // - `ntriples' or `nt' for N-Triples + // - `nquads' or `nq' for N-Quads + // - `turtle' or `ttl' for Turtle + // - `trig' for TriG + + if ( format == "NTRIPLES" || format == "NT" ) + notation = NTRIPLES; + else if ( format == "TURTLE" || format == "TTL" ) + notation = TURTLE; + else if ( format == "NQUADS" || format == "NQ" ) + notation = NQUADS; + else if ( format == "TRIG" ) + notation = TRIG; + else + throw PlTypeError("format option", ov); + } else + throw PlTypeError("option", opt); } else throw PlTypeError("option", opt); } try - { unique_ptr hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), NTRIPLES, spec)); + { unique_ptr hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), notation, spec)); //Header *header = hdt->getHeader(); //header->insert("myResource1", "property", "value"); diff --git a/prolog/hdt.pl b/prolog/hdt.pl index 2eafcb0..c3a2561 100644 --- a/prolog/hdt.pl +++ b/prolog/hdt.pl @@ -393,12 +393,16 @@ %! hdt_create_from_file(+HDTFile, +RDFFile, +Options) % -% Create a HDT file from an RDF file. RDFFile must be in -% `ntriples` format. Options: +% Create a HDT file from an RDF file. The format of RDFFile +% defaults to `ntriples` format. Options: % % * base_uri(+URI) % URI is used for generating the header properties (see % http_header/4. +% * format(+Notation) +% Notation is one of: `ntriples`, `nt`, `turtle`, `ttl`, `nquads`, `nq`, `trig` +% (also upper/lower case variants such as "TriG" or "TTL") +% defaults to `ntriples`. /******************************* diff --git a/test/data/README.md b/test/data/README.md index d5e2d74..d7de89a 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -1,14 +1,18 @@ +The test file `breg-dcat-example.ttl` is from +https://gist.github.com/agmangas/162d866b8efa310a5f07077696d64d85 +which is referenced by +https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap/document/breg-dcat-practical-example +(Note that this has Unicode characaters, so you should download the zip file and unzip it.) + The file `hdt-cpp-libhdt-data-literals.hdt` is a copy of `../../hdt-cpp/libhdt/data/literals.hdt`. The files were generated by: - ``` +cd test/data && \ +../../hdt-cpp/libhdt/tools/rdf2hdt -f turtle breg-dcat-example.ttl breg-dcat-example.hdt + ./hdt-cpp/libhdt/tools/hdtInfo hdt-cpp-libhdt-data-literals.hdt >literals.hdtInfo-output echo '? ? ?' | ./hdt-cpp/libhdt/tools/hdtSearch hdt-cpp-libhdt-data-literals.hdt 2>/dev/null >literals.hdtSearch-output ``` -``` -echo '? ? ?' | $HOME/src/hdt/hdt-cpp/libhdt/tools/.libs/hdtSearch $HOME/src/hdt/test/data/hdt-cpp-libhdt-data-literals.hdt -``` -TODO: use hdt2rdf, rdf2hdt, library(hdt). diff --git a/test/data/breg-dcat-example.hdt b/test/data/breg-dcat-example.hdt new file mode 100644 index 0000000..07e7afb Binary files /dev/null and b/test/data/breg-dcat-example.hdt differ diff --git a/test/data/breg-dcat-example.ttl b/test/data/breg-dcat-example.ttl new file mode 100644 index 0000000..8b6841c --- /dev/null +++ b/test/data/breg-dcat-example.ttl @@ -0,0 +1,155 @@ +@prefix dcat: . +@prefix dct: . +@prefix foaf: . +@prefix skos: . +@prefix xsd: . +@prefix cpsv: . +@prefix cv: . + +# An example of a Public Organisation: +# "A Public Organisation is the responsible Agent for the delivery of a Public Service. +# This specification uses the class from the Core Public Organization Vocabulary, +# based also on the W3C Organization Ontology" +# All Public Registry Services must be related to a Public Organisation. + + + a cv:PublicOrganisation ; + dct:identifier "ae1d152b-57b7-4e78-bf6f-fbe8ceb0af06" ; + skos:prefLabel "Administración Pública del Gobierno de España"@es ; + dct:spatial ; + dct:title "Administración pública"@es . + +# An example of a Public Registry Service: +# "A Registry Service is a public service that creates, maintains +# and/or manages Base Registries or a Registry of Base Registries. +# This service, provided by public administrations, or by other organisations on their behalf, +# stores and provides basic information on authoritative data items such as +# people, companies, vehicles, licences, buildings, locations and roads." + + + a cpsv:PublicService ; + dct:description "National vehicles registration service provided by the Public Administration"@en ; + dct:description "Servicio nacional de registro de vehículos proporcionado por la Administración Pública"@es ; + dct:identifier "" ; + dct:title "Vehicles registration service"@en ; + dct:title "Servicio de registro de vehículos"@es ; + cv:hasCompetentAuthority . + +# An example of a Registry Catalog: +# "A catalogue or repository that lists Datasets or Data Services +# managed and provided by a Base Registry Service. +# The Registry Catalogue is the main output produced by a Register, +# so this specification considers both, DCAT-AP and CPSV-AP to model services and content." +# This example in particular represents a base registry of vehicles that exposes two distinct datasets. + + + a dcat:Catalog ; + dct:description "This is a literal that contains an extended catalog description"@en ; + dct:identifier "9fc50d4e-d099-4139-b99f-75ccc18265b4" ; + dct:title "Vehicles Register"@en ; + dct:title "Registro de vehículos"@es ; + dct:publisher ; + foaf:homepage ; + dct:issued "2019-06-18T10:30:00"^^xsd:dateTime ; + dct:LinguisticSystem ; + dcat:themeTaxonomy ; + dct:spatial ; + dcat:dataset [ + a dcat:Dataset ; + dct:description "Dataset of legal owners of registered vehicles."@en ; + dct:identifier "" ; + dct:title "Vehicle owners"@en ; + dcat:theme ; + dcat:distribution [ + a dcat:Distribution ; + dct:description "Unstructured owners distribution (PDF)"@en ; + dcat:accessURL ; + dcat:mediaType + ] ; + dcat:distribution [ + a dcat:Distribution ; + dct:description "Structured owners distribution (JSON)"@en ; + dcat:accessURL ; + dcat:mediaType + ] ; + dct:spatial ; + dcat:keyword "Vehicle"@en ; + dcat:keyword "Transport"@en ; + dcat:keyword "Vehículo"@es ; + dcat:keyword "Transporte"@es + ] ; + dcat:dataset [ + a dcat:Dataset ; + dct:description "Dataset of vehicle transactions in the Principality of Asturias."@en ; + dct:identifier "" ; + dct:title "Vehicle transactions (Asturias)"@en ; + dcat:theme ; + dcat:theme ; + dcat:distribution [ + a dcat:Distribution ; + dcat:accessURL ; + dcat:mediaType + ] ; + dct:spatial ; + dct:temporal [ + a dct:PeriodOfTime ; + dcat:startDate "2019-01-01T00:00:00"^^xsd:dateTime ; + dcat:endDate "2020-01-01T00:00:00"^^xsd:dateTime + ] ; + dcat:keyword "Vehicle"@en ; + dcat:keyword "Transport"@en ; + dcat:keyword "Transaction"@en ; + ] . + +# An example of an Agent: +# "An entity that is associated with Public Services, Registries and/or Datasets. +# Agents may be either organisations or persons." + + + a foaf:Agent ; + foaf:name "Public Administration" ; + dct:identifier "public-admin" ; + skos:inScheme ; + dct:type . + +# These assertions are somewhat redundant but are necessary to ensure +# that the BRegDCAT validator at the following URL does not produce error violations: +# https://www.itb.ec.europa.eu/shacl/bregdcat-ap/upload + + + a foaf:Document . + + + a skos:ConceptScheme ; + dct:title "EuroVoc" . + + + a skos:Concept ; + skos:prefLabel "National authority" ; + skos:inScheme . + + + a skos:ConceptScheme ; + dct:title "Publisher Type" . + + + a skos:Concept ; + skos:prefLabel "Transport" . + + + a skos:Concept ; + skos:prefLabel "Trade" . + + + a dct:Location ; + skos:inScheme . + + + a dct:Location ; + skos:inScheme . + + + a dct:MediaType . + + + a dct:MediaType . diff --git a/test/data/hdt-cpp-libhdt-data-literals.hdt b/test/data/hdt-cpp-libhdt-data-literals.hdt new file mode 100644 index 0000000..b67bf29 Binary files /dev/null and b/test/data/hdt-cpp-libhdt-data-literals.hdt differ diff --git a/test/test_hdt.pl b/test/test_hdt.pl index ae10e3d..ab2d5c6 100644 --- a/test/test_hdt.pl +++ b/test/test_hdt.pl @@ -2,6 +2,13 @@ /* These tests use the data in data/hdt-cpp-libhdt-data-literals.hdt See the comments in data/README.md about where this comes from. + + TODO: Test hdt_create_from_file/3. One possible way: + (cd data && ../../hdt-cpp/libhdt/tools/rdf2hdt -f turtle breg-dcat-example.ttl breg-dcat-example.hdt) + hdt_create_from_file('/tmp/breg-dcat-example.hdt', 'data/breg-dcat-example.ttl', [format(ttl),base_uri('file://breg-dcat-example.ttl')]). + hdt_open(H, '/tmp/breg-dcat-example.hdt'), foreach(hdt_search(H, S, P, O), (writeq(S-P-O),nl)), hdt_close(H). + ... and compare with data/breg-dcat-example.hdt + echo '? ? ?'|./hdt-cpp/libhdt/tools/hdtSearch /tmp/breg-dcat-example.hdt */ :- module(test_hdt, @@ -30,6 +37,40 @@ 1-1-9]), hdt_close(H). +/* + TBD: hdtSearch gives a slightly different result that hdt_search/4. + This seems to be because TripleString::setObject() + removes suffix "^^http://www.w3.org/2001/XMLSchema#string" + Note that '^^' and '@' are binary operators (exported from library(hdt)). + echo '? ? ?'|./hdt-cpp/libhdt/tools/hdtSearch ./test/data/hdt-cpp-libhdt-data-literals.hdt +s p "a" +s p "a"@en +s p "a"^^ +s p "abc" +s p "abc"@en +s p "abc"^^ +s p "bc" +s p "bc"@en +s p "bc"^^ +9 results in 51 us +*/ + +test(hdt_search) :- + hdt_open_literals(H), + bagof(S-P-O, hdt_search(H, S, P, O), SPOs), + assertion(SPOs == + [s-p-("a"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"a"@en, + s-p-("a"^^bcd), + s-p-("abc"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"abc"@en, + s-p-("abc"^^bcd), + s-p-("bc"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"bc"@en, + s-p-("bc"^^bcd) + ]), + hdt_close(H). + hdt_open_literals(H) :- predicate_property(test_hdt, file(TestHdtFile)), directory_file_path(TestHdtDir, _, TestHdtFile),