Skip to content

Commit

Permalink
Add format option to hdt_create_from_file/3
Browse files Browse the repository at this point in the history
  • Loading branch information
kamahen authored and JanWielemaker committed Jul 12, 2024
1 parent 4f49cc6 commit 878feb1
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 12 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,15 @@ This pack is based on our [fork of the
hdt-cpp](https://github.com/JanWielemaker/hdt-cpp). The fork is mostly
the work of Peter Ludemann, fixing several issues with modern C++ and
libraries.

## Notes

The `hdt-cpp` directory has a number of utilities in
`hdt-cpp/libhdt/tools`. For details, see `hdt-cpp/README.md`:
* hdt2rdf
* hdtInfo
* hdtSearch
* modifyHeader
* rdf2hdt
* replaceHeader
* searchHeader
35 changes: 30 additions & 5 deletions c/hdt4pl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,31 +624,56 @@ PREDICATE(hdt_search_cost_id, 5)

PREDICATE(hdt_create_from_file, 3)
{ static PlAtom ATOM_base_uri("base_uri");
static PlAtom ATOM_format("format");
char *hdt_file, *rdf_file;
HDTSpecification spec;
std::string base_uri("http://example.org/base");
RDFNotation notation = NTRIPLES;

if ( !A1.get_file_name(&hdt_file, PL_FILE_OSPATH) ||
!A1.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) )
!A2.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) )
return false;

PlTerm_tail options(A3);
PlTerm_var opt;
while(options.next(opt))
{ PlAtom name(PlAtom::null);
{ PlAtom name(PlAtom::null);
size_t arity;

if ( opt.get_name_arity(&name, &arity) && arity == 1 )
{ PlTerm ov = opt[1];

if ( name == ATOM_base_uri )
base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8);
{ base_uri = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8);
} else if ( name == ATOM_format )
{ std::string format = ov.get_nchars(CVT_ATOM|CVT_STRING|CVT_EXCEPTION|REP_UTF8);
for ( auto &c : format )
c = toupper(c);
// The following are the supported values per hdt-cpp/libhdt/include/HDTEnums.hpp
// and hdtInfo -h (which lists nquads,nq,ntriples,nt,trig,turtle,ttl
// but if an unsupported value is specified gives:
// - `ntriples' or `nt' for N-Triples
// - `nquads' or `nq' for N-Quads
// - `turtle' or `ttl' for Turtle
// - `trig' for TriG

if ( format == "NTRIPLES" || format == "NT" )
notation = NTRIPLES;
else if ( format == "TURTLE" || format == "TTL" )
notation = TURTLE;
else if ( format == "NQUADS" || format == "NQ" )
notation = NQUADS;
else if ( format == "TRIG" )
notation = TRIG;
else
throw PlTypeError("format option", ov);
} else
throw PlTypeError("option", opt);
} else
throw PlTypeError("option", opt);
}

try
{ unique_ptr<HDT> hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), NTRIPLES, spec));
{ unique_ptr<HDT> hdt(HDTManager::generateHDT(rdf_file, base_uri.c_str(), notation, spec));

//Header *header = hdt->getHeader();
//header->insert("myResource1", "property", "value");
Expand Down
8 changes: 6 additions & 2 deletions prolog/hdt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,16 @@

%! hdt_create_from_file(+HDTFile, +RDFFile, +Options)
%
% Create a HDT file from an RDF file. RDFFile must be in
% `ntriples` format. Options:
% Create a HDT file from an RDF file. The format of RDFFile
% defaults to `ntriples` format. Options:
%
% * base_uri(+URI)
% URI is used for generating the header properties (see
% http_header/4.
% * format(+Notation)
% Notation is one of: `ntriples`, `nt`, `turtle`, `ttl`, `nquads`, `nq`, `trig`
% (also upper/lower case variants such as "TriG" or "TTL")
% defaults to `ntriples`.


/*******************************
Expand Down
14 changes: 9 additions & 5 deletions test/data/README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
The test file `breg-dcat-example.ttl` is from
https://gist.github.com/agmangas/162d866b8efa310a5f07077696d64d85
which is referenced by
https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap/document/breg-dcat-practical-example
(Note that this has Unicode characaters, so you should download the zip file and unzip it.)

The file `hdt-cpp-libhdt-data-literals.hdt` is a copy of `../../hdt-cpp/libhdt/data/literals.hdt`.

The files were generated by:

```
cd test/data && \
../../hdt-cpp/libhdt/tools/rdf2hdt -f turtle breg-dcat-example.ttl breg-dcat-example.hdt
./hdt-cpp/libhdt/tools/hdtInfo hdt-cpp-libhdt-data-literals.hdt >literals.hdtInfo-output
echo '? ? ?' | ./hdt-cpp/libhdt/tools/hdtSearch hdt-cpp-libhdt-data-literals.hdt 2>/dev/null >literals.hdtSearch-output
```

```
echo '? ? ?' | $HOME/src/hdt/hdt-cpp/libhdt/tools/.libs/hdtSearch $HOME/src/hdt/test/data/hdt-cpp-libhdt-data-literals.hdt
```

TODO: use hdt2rdf, rdf2hdt, library(hdt).
Binary file added test/data/breg-dcat-example.hdt
Binary file not shown.
155 changes: 155 additions & 0 deletions test/data/breg-dcat-example.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix cpsv: <http://purl.org/vocab/cpsv#> .
@prefix cv: <http://data.europa.eu/m8g/> .

# An example of a Public Organisation:
# "A Public Organisation is the responsible Agent for the delivery of a Public Service.
# This specification uses the class from the Core Public Organization Vocabulary,
# based also on the W3C Organization Ontology"
# All Public Registry Services must be related to a Public Organisation.

<https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap#example-public-organisation>
a cv:PublicOrganisation ;
dct:identifier "ae1d152b-57b7-4e78-bf6f-fbe8ceb0af06" ;
skos:prefLabel "Administración Pública del Gobierno de España"@es ;
dct:spatial <http://publications.europa.eu/resource/authority/country/ESP> ;
dct:title "Administración pública"@es .

# An example of a Public Registry Service:
# "A Registry Service is a public service that creates, maintains
# and/or manages Base Registries or a Registry of Base Registries.
# This service, provided by public administrations, or by other organisations on their behalf,
# stores and provides basic information on authoritative data items such as
# people, companies, vehicles, licences, buildings, locations and roads."

<https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap#example-public-service>
a cpsv:PublicService ;
dct:description "National vehicles registration service provided by the Public Administration"@en ;
dct:description "Servicio nacional de registro de vehículos proporcionado por la Administración Pública"@es ;
dct:identifier "<https://example.com/vehicles-service>" ;
dct:title "Vehicles registration service"@en ;
dct:title "Servicio de registro de vehículos"@es ;
cv:hasCompetentAuthority <https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap#example-public-organisation> .

# An example of a Registry Catalog:
# "A catalogue or repository that lists Datasets or Data Services
# managed and provided by a Base Registry Service.
# The Registry Catalogue is the main output produced by a Register,
# so this specification considers both, DCAT-AP and CPSV-AP to model services and content."
# This example in particular represents a base registry of vehicles that exposes two distinct datasets.

<https://joinup.ec.europa.eu/collection/access-base-registries/solution/abr-bregdcat-ap#example-catalog>
a dcat:Catalog ;
dct:description "This is a literal that contains an extended catalog description"@en ;
dct:identifier "9fc50d4e-d099-4139-b99f-75ccc18265b4" ;
dct:title "Vehicles Register"@en ;
dct:title "Registro de vehículos"@es ;
dct:publisher <https://government.example.com> ;
foaf:homepage <https://example.com/vehicles-register> ;
dct:issued "2019-06-18T10:30:00"^^xsd:dateTime ;
dct:LinguisticSystem <http://id.loc.gov/vocabulary/iso639-1/en> ;
dcat:themeTaxonomy <http://eurovoc.europa.eu/100141> ;
dct:spatial <http://publications.europa.eu/resource/authority/country/ESP> ;
dcat:dataset [
a dcat:Dataset ;
dct:description "Dataset of legal owners of registered vehicles."@en ;
dct:identifier "<https://example.com/vehicles-register/owners>" ;
dct:title "Vehicle owners"@en ;
dcat:theme <http://eurovoc.europa.eu/100154> ;
dcat:distribution [
a dcat:Distribution ;
dct:description "Unstructured owners distribution (PDF)"@en ;
dcat:accessURL <https://example.com/vehicles-register/owners.pdf> ;
dcat:mediaType <https://www.w3.org/ns/iana/media-types/application/pdf#Resource>
] ;
dcat:distribution [
a dcat:Distribution ;
dct:description "Structured owners distribution (JSON)"@en ;
dcat:accessURL <https://example.com/vehicles-register/owners.json> ;
dcat:mediaType <https://www.w3.org/ns/iana/media-types/application/json#Resource>
] ;
dct:spatial <http://publications.europa.eu/resource/authority/country/ESP> ;
dcat:keyword "Vehicle"@en ;
dcat:keyword "Transport"@en ;
dcat:keyword "Vehículo"@es ;
dcat:keyword "Transporte"@es
] ;
dcat:dataset [
a dcat:Dataset ;
dct:description "Dataset of vehicle transactions in the Principality of Asturias."@en ;
dct:identifier "<https://example.com/vehicles-register/transactions>" ;
dct:title "Vehicle transactions (Asturias)"@en ;
dcat:theme <http://eurovoc.europa.eu/100154> ;
dcat:theme <http://eurovoc.europa.eu/100147> ;
dcat:distribution [
a dcat:Distribution ;
dcat:accessURL <https://example.com/vehicles-register/transactions.json> ;
dcat:mediaType <https://www.w3.org/ns/iana/media-types/application/json#Resource>
] ;
dct:spatial <https://sws.geonames.org/3114710/> ;
dct:temporal [
a dct:PeriodOfTime ;
dcat:startDate "2019-01-01T00:00:00"^^xsd:dateTime ;
dcat:endDate "2020-01-01T00:00:00"^^xsd:dateTime
] ;
dcat:keyword "Vehicle"@en ;
dcat:keyword "Transport"@en ;
dcat:keyword "Transaction"@en ;
] .

# An example of an Agent:
# "An entity that is associated with Public Services, Registries and/or Datasets.
# Agents may be either organisations or persons."

<https://government.example.com>
a foaf:Agent ;
foaf:name "Public Administration" ;
dct:identifier "public-admin" ;
skos:inScheme <http://publications.europa.eu/resource/authority/corporate-body> ;
dct:type <http://purl.org/adms/publishertype/NationalAuthority> .

# These assertions are somewhat redundant but are necessary to ensure
# that the BRegDCAT validator at the following URL does not produce error violations:
# https://www.itb.ec.europa.eu/shacl/bregdcat-ap/upload

<https://example.com/vehicles-register>
a foaf:Document .

<http://eurovoc.europa.eu/100141>
a skos:ConceptScheme ;
dct:title "EuroVoc" .

<http://purl.org/adms/publishertype/NationalAuthority>
a skos:Concept ;
skos:prefLabel "National authority" ;
skos:inScheme <http://purl.org/adms/publishertype/1.0> .

<http://purl.org/adms/publishertype/1.0>
a skos:ConceptScheme ;
dct:title "Publisher Type" .

<http://eurovoc.europa.eu/100154>
a skos:Concept ;
skos:prefLabel "Transport" .

<http://eurovoc.europa.eu/100147>
a skos:Concept ;
skos:prefLabel "Trade" .

<http://publications.europa.eu/resource/authority/country/ESP>
a dct:Location ;
skos:inScheme <http://publications.europa.eu/resource/authority/country> .

<https://sws.geonames.org/3114710/>
a dct:Location ;
skos:inScheme <http://sws.geonames.org> .

<https://www.w3.org/ns/iana/media-types/application/pdf#Resource>
a dct:MediaType .

<https://www.w3.org/ns/iana/media-types/application/json#Resource>
a dct:MediaType .
Binary file added test/data/hdt-cpp-libhdt-data-literals.hdt
Binary file not shown.
41 changes: 41 additions & 0 deletions test/test_hdt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

/* These tests use the data in data/hdt-cpp-libhdt-data-literals.hdt
See the comments in data/README.md about where this comes from.
TODO: Test hdt_create_from_file/3. One possible way:
(cd data && ../../hdt-cpp/libhdt/tools/rdf2hdt -f turtle breg-dcat-example.ttl breg-dcat-example.hdt)
hdt_create_from_file('/tmp/breg-dcat-example.hdt', 'data/breg-dcat-example.ttl', [format(ttl),base_uri('file://breg-dcat-example.ttl')]).
hdt_open(H, '/tmp/breg-dcat-example.hdt'), foreach(hdt_search(H, S, P, O), (writeq(S-P-O),nl)), hdt_close(H).
... and compare with data/breg-dcat-example.hdt
echo '? ? ?'|./hdt-cpp/libhdt/tools/hdtSearch /tmp/breg-dcat-example.hdt
*/

:- module(test_hdt,
Expand Down Expand Up @@ -30,6 +37,40 @@
1-1-9]),
hdt_close(H).

/*
TBD: hdtSearch gives a slightly different result that hdt_search/4.
This seems to be because TripleString::setObject()
removes suffix "^^http://www.w3.org/2001/XMLSchema#string"
Note that '^^' and '@' are binary operators (exported from library(hdt)).
echo '? ? ?'|./hdt-cpp/libhdt/tools/hdtSearch ./test/data/hdt-cpp-libhdt-data-literals.hdt
s p "a"
s p "a"@en
s p "a"^^<bcd>
s p "abc"
s p "abc"@en
s p "abc"^^<bcd>
s p "bc"
s p "bc"@en
s p "bc"^^<bcd>
9 results in 51 us
*/

test(hdt_search) :-
hdt_open_literals(H),
bagof(S-P-O, hdt_search(H, S, P, O), SPOs),
assertion(SPOs ==
[s-p-("a"^^'http://www.w3.org/2001/XMLSchema#string'),
s-p-"a"@en,
s-p-("a"^^bcd),
s-p-("abc"^^'http://www.w3.org/2001/XMLSchema#string'),
s-p-"abc"@en,
s-p-("abc"^^bcd),
s-p-("bc"^^'http://www.w3.org/2001/XMLSchema#string'),
s-p-"bc"@en,
s-p-("bc"^^bcd)
]),
hdt_close(H).

hdt_open_literals(H) :-
predicate_property(test_hdt, file(TestHdtFile)),
directory_file_path(TestHdtDir, _, TestHdtFile),
Expand Down

0 comments on commit 878feb1

Please sign in to comment.