From 878feb16997e70d6f317828bf236a5509d4b9947 Mon Sep 17 00:00:00 2001 From: Peter Ludemann Date: Mon, 24 Jun 2024 19:55:46 -0700 Subject: [PATCH] Add format option to hdt_create_from_file/3 --- README.md | 12 ++ c/hdt4pl.cpp | 35 ++++- prolog/hdt.pl | 8 +- test/data/README.md | 14 +- test/data/breg-dcat-example.hdt | Bin 0 -> 4742 bytes test/data/breg-dcat-example.ttl | 155 +++++++++++++++++++++ test/data/hdt-cpp-libhdt-data-literals.hdt | Bin 0 -> 3848 bytes test/test_hdt.pl | 41 ++++++ 8 files changed, 253 insertions(+), 12 deletions(-) create mode 100644 test/data/breg-dcat-example.hdt create mode 100644 test/data/breg-dcat-example.ttl create mode 100644 test/data/hdt-cpp-libhdt-data-literals.hdt diff --git a/README.md b/README.md index ad89666..b5dfd26 100644 --- a/README.md +++ b/README.md @@ -57,3 +57,15 @@ This pack is based on our [fork of the hdt-cpp](https://github.com/JanWielemaker/hdt-cpp). The fork is mostly the work of Peter Ludemann, fixing several issues with modern C++ and libraries. + +## Notes + +The `hdt-cpp` directory has a number of utilities in +`hdt-cpp/libhdt/tools`. For details, see `hdt-cpp/README.md`: +* hdt2rdf +* hdtInfo +* hdtSearch +* modifyHeader +* rdf2hdt +* replaceHeader +* searchHeader diff --git a/c/hdt4pl.cpp b/c/hdt4pl.cpp index c55d402..8df44e4 100644 --- a/c/hdt4pl.cpp +++ b/c/hdt4pl.cpp @@ -624,31 +624,56 @@ PREDICATE(hdt_search_cost_id, 5) PREDICATE(hdt_create_from_file, 3) { static PlAtom ATOM_base_uri("base_uri"); + static PlAtom ATOM_format("format"); char *hdt_file, *rdf_file; HDTSpecification spec; std::string base_uri("http://example.org/base"); + RDFNotation notation = NTRIPLES; if ( !A1.get_file_name(&hdt_file, PL_FILE_OSPATH) || - !A1.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) ) + !A2.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) ) return false; PlTerm_tail options(A3); PlTerm_var opt; while(options.next(opt)) - { PlAtom name(PlAtom::null); + { PlAtom name(PlAtom::null); size_t arity; if ( opt.get_name_arity(&name, &arity) && arity == 1 ) { PlTerm ov = opt[1]; - if ( name == ATOM_base_uri ) - base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + { base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + } else if ( name == ATOM_format ) + { std::string format = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8); + for ( auto &c : format ) + c = toupper(c); + // The following are the supported values per hdt-cpp/libhdt/include/HDTEnums.hpp + // and hdtInfo -h (which lists nquads,nq,ntriples,nt,trig,turtle,ttl + // but if an unsupported value is specified gives: + // - `ntriples' or `nt' for N-Triples + // - `nquads' or `nq' for N-Quads + // - `turtle' or `ttl' for Turtle + // - `trig' for TriG + + if ( format == "NTRIPLES" || format == "NT" ) + notation = NTRIPLES; + else if ( format == "TURTLE" || format == "TTL" ) + notation = TURTLE; + else if ( format == "NQUADS" || format == "NQ" ) + notation = NQUADS; + else if ( format == "TRIG" ) + notation = TRIG; + else + throw PlTypeError("format option", ov); + } else + throw PlTypeError("option", opt); } else throw PlTypeError("option", opt); } try - { unique_ptr hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), NTRIPLES, spec)); + { unique_ptr hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), notation, spec)); //Header *header = hdt->getHeader(); //header->insert("myResource1", "property", "value"); diff --git a/prolog/hdt.pl b/prolog/hdt.pl index 2eafcb0..c3a2561 100644 --- a/prolog/hdt.pl +++ b/prolog/hdt.pl @@ -393,12 +393,16 @@ %! hdt_create_from_file(+HDTFile, +RDFFile, +Options) % -% Create a HDT file from an RDF file. RDFFile must be in -% `ntriples` format. Options: +% Create a HDT file from an RDF file. The format of RDFFile +% defaults to `ntriples` format. Options: % % * base_uri(+URI) % URI is used for generating the header properties (see % http_header/4. +% * format(+Notation) +% Notation is one of: `ntriples`, `nt`, `turtle`, `ttl`, `nquads`, `nq`, `trig` +% (also upper/lower case variants such as "TriG" or "TTL") +% defaults to `ntriples`. /******************************* diff --git a/test/data/README.md b/test/data/README.md index d5e2d74..d7de89a 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -1,14 +1,18 @@ +The test file `breg-dcat-example.ttl` is from +https://gist.github.com/agmangas/162d866b8efa310a5f07077696d64d85 +which is referenced by +https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap/document/breg-dcat-practical-example +(Note that this has Unicode characaters, so you should download the zip file and unzip it.) + The file `hdt-cpp-libhdt-data-literals.hdt` is a copy of `../../hdt-cpp/libhdt/data/literals.hdt`. The files were generated by: - ``` +cd test/data && \ +../../hdt-cpp/libhdt/tools/rdf2hdt -f turtle breg-dcat-example.ttl breg-dcat-example.hdt + ./hdt-cpp/libhdt/tools/hdtInfo hdt-cpp-libhdt-data-literals.hdt >literals.hdtInfo-output echo '? ? ?' | ./hdt-cpp/libhdt/tools/hdtSearch hdt-cpp-libhdt-data-literals.hdt 2>/dev/null >literals.hdtSearch-output ``` -``` -echo '? ? ?' | $HOME/src/hdt/hdt-cpp/libhdt/tools/.libs/hdtSearch $HOME/src/hdt/test/data/hdt-cpp-libhdt-data-literals.hdt -``` -TODO: use hdt2rdf, rdf2hdt, library(hdt). diff --git a/test/data/breg-dcat-example.hdt b/test/data/breg-dcat-example.hdt new file mode 100644 index 0000000000000000000000000000000000000000..07e7afba36adb51e1c6d8a91591f6054a948f30f GIT binary patch literal 4742 zcmcInYit}>6~1dqqf$#CR84a3j%LM)4Yc7q?upU;GtHtgB1KWjZuV}xgoe5 za08!u4&lflZZ)XwF$$UNZuFHeK2)I&0ejOYRn@j|pprRUhn49da7LBb{p{>)dUjZD zV3bNFP0whVjOyDJmABjgj?1uc(sE&2jFhy%c53^Z z_PK}(H>xCy`Q+Av@AKpN%p-n42?tl1?CrR+DG#V?1?7f$oLGT$sQ*74j&=MMGTZRi zch)?>fr!z<&ZBOH`E|Hu?x-?7(s{$TUdlfAUc+?2xA3{a{pN3OW2-Ru4ZDO7!Rz!$ zbQ$+aU#K>jHiygVxPXov@to2R3#)Xa+K$Ry$?uL*;3hKh4FQg5|W4=w` zY?I~jv9P8+{dZGa=F=*5u@jXcU&=-2xfNs3p2%)EUx*I!CSBPVK&PIR^9HdY?u?$v zs(N0{WG9W%h+&N63{@|T=#uS(u)U+tNnC+0P9?rk1g|~D?jEG<87|}syOF+U;B3CV zGLg8eJ%xVq>pPEJ9(rtg#57PlgWAKWoki^&YUj~>q_(8tvnI3B;@89QqZztxWDWH7 z0{U7midF!9lHl{Ja))`=UWdsd?5iUcNhr|6hLJ58x`sqWMG`#|28cJsF4pSArdSo}a}Bs5rhM%pL?f1dh7V&JQT?UffK zs?wP|PrO1C5rAwk>cK_5Sk-EnA7FKl1aMH@0|q2Tpy)KmVW7B%RM&C~PPmR%ZNe>} z2yrZEmD=EdRwx|(nrtJq+JO>UZ0B1;RmiabXldzZ+@vZ?O`9b(Pkb6HniRYNh>MgW1S9e^V7Clj6xT5K4 zL#wem@o*Ik1rX7zUwFMW@Y4P#S8gQk{=ADWoEyX+v9JEK{07jDQBrlF8e>w|fo%lg zJn=+KH9b{=G8o#iBZp<`I0d(JFbj07ogXp|FbL zkScbRaA$EJ^BJ*~nB&r5@fNF)8eLQIR6!Xw@;Slim z6tf`um|B$%GbhpMc%x`@Uy_*;<*dL!--Kqf6|)uYC8tuQTL;mPGQ@@PO<$ptg5DJY zQLey0R-oVn*aZrM1I1$SjYwU1 z?kZ#s2u@Tj;!Oy(LtfI!qK|E;$B7tc^5IDcG)ST|;=oT68P>v47oxajWa9@Pjslws zWid)Xx;(WGFAv7Z2o09yl4Y)hr$T&re>$q^IvmJ1N!TDfF}USu0TaVEav4+270iN~ zC557DR`M0KVv?doOdVJBJfJR;4A6V(COgvwu*gPn2mv(KtEo%WPQ$R826=~XT??BH z2fi{i%2^(|-O!*?+ZscBAqN3bO=h>y6*-i229mD!z(%3W91-FyS$K7v_>|dW6~TOH zMaH689&e_?s6+6Ax@;T**6B^Zpd4VXMZ5rgE7FyYm>hum2*q8C=;9XYaj*xen@YmI zDySuRVpc;;NLv`FA~sBmDFj`H;R8L_i=^b#do=57;L@u{t;Z(O-;x^Tw(9 w@BQ%kW7c~k literal 0 HcmV?d00001 diff --git a/test/data/breg-dcat-example.ttl b/test/data/breg-dcat-example.ttl new file mode 100644 index 0000000..8b6841c --- /dev/null +++ b/test/data/breg-dcat-example.ttl @@ -0,0 +1,155 @@ +@prefix dcat: . +@prefix dct: . +@prefix foaf: . +@prefix skos: . +@prefix xsd: . +@prefix cpsv: . +@prefix cv: . + +# An example of a Public Organisation: +# "A Public Organisation is the responsible Agent for the delivery of a Public Service. +# This specification uses the class from the Core Public Organization Vocabulary, +# based also on the W3C Organization Ontology" +# All Public Registry Services must be related to a Public Organisation. + + + a cv:PublicOrganisation ; + dct:identifier "ae1d152b-57b7-4e78-bf6f-fbe8ceb0af06" ; + skos:prefLabel "Administración Pública del Gobierno de España"@es ; + dct:spatial ; + dct:title "Administración pública"@es . + +# An example of a Public Registry Service: +# "A Registry Service is a public service that creates, maintains +# and/or manages Base Registries or a Registry of Base Registries. +# This service, provided by public administrations, or by other organisations on their behalf, +# stores and provides basic information on authoritative data items such as +# people, companies, vehicles, licences, buildings, locations and roads." + + + a cpsv:PublicService ; + dct:description "National vehicles registration service provided by the Public Administration"@en ; + dct:description "Servicio nacional de registro de vehículos proporcionado por la Administración Pública"@es ; + dct:identifier "" ; + dct:title "Vehicles registration service"@en ; + dct:title "Servicio de registro de vehículos"@es ; + cv:hasCompetentAuthority . + +# An example of a Registry Catalog: +# "A catalogue or repository that lists Datasets or Data Services +# managed and provided by a Base Registry Service. +# The Registry Catalogue is the main output produced by a Register, +# so this specification considers both, DCAT-AP and CPSV-AP to model services and content." +# This example in particular represents a base registry of vehicles that exposes two distinct datasets. + + + a dcat:Catalog ; + dct:description "This is a literal that contains an extended catalog description"@en ; + dct:identifier "9fc50d4e-d099-4139-b99f-75ccc18265b4" ; + dct:title "Vehicles Register"@en ; + dct:title "Registro de vehículos"@es ; + dct:publisher ; + foaf:homepage ; + dct:issued "2019-06-18T10:30:00"^^xsd:dateTime ; + dct:LinguisticSystem ; + dcat:themeTaxonomy ; + dct:spatial ; + dcat:dataset [ + a dcat:Dataset ; + dct:description "Dataset of legal owners of registered vehicles."@en ; + dct:identifier "" ; + dct:title "Vehicle owners"@en ; + dcat:theme ; + dcat:distribution [ + a dcat:Distribution ; + dct:description "Unstructured owners distribution (PDF)"@en ; + dcat:accessURL ; + dcat:mediaType + ] ; + dcat:distribution [ + a dcat:Distribution ; + dct:description "Structured owners distribution (JSON)"@en ; + dcat:accessURL ; + dcat:mediaType + ] ; + dct:spatial ; + dcat:keyword "Vehicle"@en ; + dcat:keyword "Transport"@en ; + dcat:keyword "Vehículo"@es ; + dcat:keyword "Transporte"@es + ] ; + dcat:dataset [ + a dcat:Dataset ; + dct:description "Dataset of vehicle transactions in the Principality of Asturias."@en ; + dct:identifier "" ; + dct:title "Vehicle transactions (Asturias)"@en ; + dcat:theme ; + dcat:theme ; + dcat:distribution [ + a dcat:Distribution ; + dcat:accessURL ; + dcat:mediaType + ] ; + dct:spatial ; + dct:temporal [ + a dct:PeriodOfTime ; + dcat:startDate "2019-01-01T00:00:00"^^xsd:dateTime ; + dcat:endDate "2020-01-01T00:00:00"^^xsd:dateTime + ] ; + dcat:keyword "Vehicle"@en ; + dcat:keyword "Transport"@en ; + dcat:keyword "Transaction"@en ; + ] . + +# An example of an Agent: +# "An entity that is associated with Public Services, Registries and/or Datasets. +# Agents may be either organisations or persons." + + + a foaf:Agent ; + foaf:name "Public Administration" ; + dct:identifier "public-admin" ; + skos:inScheme ; + dct:type . + +# These assertions are somewhat redundant but are necessary to ensure +# that the BRegDCAT validator at the following URL does not produce error violations: +# https://www.itb.ec.europa.eu/shacl/bregdcat-ap/upload + + + a foaf:Document . + + + a skos:ConceptScheme ; + dct:title "EuroVoc" . + + + a skos:Concept ; + skos:prefLabel "National authority" ; + skos:inScheme . + + + a skos:ConceptScheme ; + dct:title "Publisher Type" . + + + a skos:Concept ; + skos:prefLabel "Transport" . + + + a skos:Concept ; + skos:prefLabel "Trade" . + + + a dct:Location ; + skos:inScheme . + + + a dct:Location ; + skos:inScheme . + + + a dct:MediaType . + + + a dct:MediaType . diff --git a/test/data/hdt-cpp-libhdt-data-literals.hdt b/test/data/hdt-cpp-libhdt-data-literals.hdt new file mode 100644 index 0000000000000000000000000000000000000000..b67bf29ee488fce729a1476fbb855aacc13e0556 GIT binary patch literal 3848 zcmc&$&1(}u6rWA1)>g!CFQPH6h)BBGwAChUW6=sqYb&N6yjV8bZMwSIU3Mp}^&pK1 z+N)PV1U>kdD1rw)=}j*lJO~~NiU{?+`AA~cXd1;n_+{qJd+#@IX5Q?i_uTl5GBQu8 zmr+%(?Av{=UsPdGoi}I?v2z96rqom_;Tv;_V9BBSjf4~Q&?V2TMTGJg*Qp+u zG#l&d`qy(o9s$eE3DgDGGJ0B`D--8z_;gf%Vsx z_0PalZ6N1^KsK8ZU0K_q3=OM*+3EfguC=DM6kIK!*^KD2usW8|yhcHVJ68%+p4e~h zP>p4hUsJqIKcyHVm!E-Js8cRAmRB1T7kv{bby_R+Hb@%kvkivaybg(o^>8}4dP%%` z>t!qHHw4f5ad~~);HYlQU$O9|FbZ~c*Pn>zSx&Ltbp+N;GY9qS6!8<3I;pj_qj}pc zT+aa&a18Myk#{0gr>$QCL2=rmCEeo}ped`Ye&GP6Kf|9t$VlvKtFhC*Vfql3yEx64 zQuh%Itwzr4`c~0$bX#nKHh7}-Sa`feSSmdr_P@y|GBkg6H?1Hjn+Bd?QX5PpQwgnq zMmw2FX_@}y@uZeyy!gi;8sd;^sC_x)M~A@1V|9&c!*zu+dSYmpyx1REUcMJoNaX_& z5~B6@UZ4 zv)lwgF#*U8e`CoQzyP4nRe%o20}23)jRT~h9u?ROE%GCxaza^L_l+;7yzg(wdoZFf zr(?WNq!T%H>-!cl#vJxEI6Qrh6N>2W2IDuOvq-rxNqs;VFF65 z%&FcPcdK54>6q?@pMt7am#kDBePr| zo^HL4CylyXLLT~z> +s p "abc" +s p "abc"@en +s p "abc"^^ +s p "bc" +s p "bc"@en +s p "bc"^^ +9 results in 51 us +*/ + +test(hdt_search) :- + hdt_open_literals(H), + bagof(S-P-O, hdt_search(H, S, P, O), SPOs), + assertion(SPOs == + [s-p-("a"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"a"@en, + s-p-("a"^^bcd), + s-p-("abc"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"abc"@en, + s-p-("abc"^^bcd), + s-p-("bc"^^'http://www.w3.org/2001/XMLSchema#string'), + s-p-"bc"@en, + s-p-("bc"^^bcd) + ]), + hdt_close(H). + hdt_open_literals(H) :- predicate_property(test_hdt, file(TestHdtFile)), directory_file_path(TestHdtDir, _, TestHdtFile),