Skip to content

Commit

Permalink
Minor cleanup, comments, documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
kamahen authored and JanWielemaker committed Sep 24, 2024
1 parent f6a5f74 commit 6ec58e4
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 49 deletions.
36 changes: 21 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
# Build HDT library for SWI-Prolog

# TODO: hdt-cpp/Makefile has a lot of undefined variables
# MAKEFLAGS=--warn-undefined-variables

HDTHOME=hdt-cpp
LIBHDT=$(HDTHOME)/libhdt
LIBCDS=$(HDTHOME)/libcds
HDTLIB=$(LIBHDT)/.libs
CDSLIB=$(LIBCDS)/.libs
SOBJ= $(SWIPL_MODULE_DIR)/hdt4pl.$(SWIPL_MODULE_EXT)
OBJ= c/hdt4pl.o
NPROC:=$(shell expr $$(nproc) + 1)
MAKE_J=-j$(NPROC)
COFLAGS=-O2
COFLAGS=-O2 -Wall
CXXFLAGS=$(SWIPL_CFLAGS) -I$(LIBHDT)/include -std=c++17 $(COFLAGS)
# This doesn't work because the *.so files get picked first:
# LIBS= -L$(HDTLIB) -L$(CDSLIB) -lhdt -lcds
# Instead, we copy the *.a files into the same directory as $(OBJ)
# - see the rules for $(OBJ2).
OBJ= c/hdt4pl.o
LIBS= $(HDTLIB)/libhdt.a $(CDSLIB)/libcds.a
# WARNING: A previous version of this Makefile set LD=g++
# ... this confuses hdt-cpp's use of libtool.
Expand All @@ -24,21 +23,29 @@ LIBS= $(HDTLIB)/libhdt.a $(CDSLIB)/libcds.a
# "swipl pack install ." creates ./buildenv.sh, which
# defines the SWIPL_* environment variables

# The following variables should be set by Make, but in case they're
# not, get the values that swipl sets (also in buildenv.sh)
CC?=$(SWIPL_CC)
CXX?=$(SWIPL_CXX)

# The following should be set by buildenv.sh:
SWIPL?=swipl

# A dummy file, which is created if $(HDTHOME) succeeds
HDT_CPP_SENTINEL=.hdt-cpp-sentinel

all: $(SOBJ)

$(SOBJ): $(OBJ) .hdt-cpp-sentinel
$(SOBJ): $(OBJ) $(HDT_CPP_SENTINEL)
mkdir -p $(SWIPL_MODULE_DIR)
$(CXX) $(SWIPL_MODULE_LDFLAGS) -o $@ $(OBJ) $(LIBS) $(SWIPL_MODULE_LIB) -lserd-0

c/hdt4pl.o: c/hdt4pl.cpp .hdt-cpp-sentinel
c/hdt4pl.o: c/hdt4pl.cpp $(HDT_CPP_SENTINEL)
$(CXX) $(CXXFLAGS) -c -o $@ c/hdt4pl.cpp

.hdt-cpp-sentinel: $(HDTHOME)/Makefile
$(HDT_CPP_SENTINEL): $(HDTHOME)/Makefile
set -x -e && $(MAKE) -C $(HDTHOME) $(MAKE_J)
touch .hdt-cpp-sentinel
touch $(HDT_CPP_SENTINEL)

$(HDTHOME)/Makefile:
./configure
Expand All @@ -54,13 +61,12 @@ check::
install::

clean:
rm -f $(OBJ) $(OBJ2)
$(RM) -f $(OBJ) $(OBJ2) $(HDT_CPP_SENTINEL)
[ ! -f $(HDTHOME)/Makefile ] || (cd $(HDTHOME) && git reset --hard)
[ ! -f $(HDTHOME)/Makefile ] || $(MAKE) -C $(HDTHOME) clean
rm -f .hdt-cpp-sentinel

distclean: clean
rm -f $(SOBJ)
$(RM) -f $(SOBJ)
[ ! -f $(HDTHOME)/Makefile ] || $(MAKE) -C $(HDTHOME) distclean
cd $(HDTHOME) && git clean -d -f -x

Expand All @@ -71,5 +77,5 @@ distclean: clean
# lines in configure and the "clean" rule of this Makefile

dev-build:
. ./buildenv.sh && $(MAKE)
swipl -g run_tests -t halt test/test_hdt.pl
. ./buildenv.sh && $(MAKE) all check
$(SWIPL) -g run_tests -t halt test/test_hdt.pl
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,23 @@ Installation on Windows requires more creativity though.

### Installing dependencies

In addition to the usual development tools such `make` and a C compiler
In addition to the usual development tools such as `make` and a C compiler,
we need GNU automake and related tools and the RDF base libraries `serd`
and `raptor2`. Below are the dependencies for `apt` based Linux systems
and `rpm` based systems.

For Debian/Ubuntu based systems
For Debian/Ubuntu based systems:

apt-get install libtool automake autoconf libserd-dev libraptor2-dev

For Fedora

dnf install aclocal automake libtool serd-devel raptor2-devel

The minimum version of Serd is 0.28.0 (see `hdt-cpp/README.md`), which
can be checked by the command `serdi -v` (requires installing the
`serdi` package). This package has been tested with Serd 0.30.16.

3. After the prerequisites are installed, the HDT library can be
installed from within Prolog using the following command:

Expand Down
60 changes: 34 additions & 26 deletions c/hdt4pl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ hdt_error(const char *e)


PREDICATE(hdt_open_, 3)
{ static PlAtom ATOM_map("map");
{ // Called by hdt_open/3, where FileAbs has been expanded by absolute_file_name/3
// A1 = HDT, A2 = FileAbs, A3 = Options
static PlAtom ATOM_map("map");
PlAtom access(ATOM_map); // default
int indexed = true;
PlTerm_tail options(A3);
Expand All @@ -139,8 +141,19 @@ PREDICATE(hdt_open_, 3)
throw PlDomainError("option", opt);
}

symb->file_name = A2.as_string();
const char* fn = symb->file_name.c_str();
// This predicate is called by hdt_open/3, which has already called
// absolute_file_name/3, so the call to PL_get_file_name() here is
// to get the file name into the appropriate OS style (Windows vs
// Unix), although that might not be needed (it's not clear where
// Windows accepts "/" as a path separator; but get_file_name() also
// deals with Unicode issues, which Windows will eventually
// support). PL_FILE_SEARCH is *not* specified because hdt_open/3
// has already done that.
// TODO: new version of PlTerm::get_file_name() that returns std::string.
// (requires SWI-Prolog 9.3.12)
char *fn;
PlCheckFail(A2.get_file_name(&fn, PL_FILE_OSPATH|PL_FILE_ABSOLUTE|PL_FILE_READ));
symb->file_name = fn;

try
{ if ( access == ATOM_map )
Expand Down Expand Up @@ -206,28 +219,21 @@ unify_object(PlTerm t, const char *s)
e--;
if ( e > s )
{ if ( e[1] == '\0' ) /* No type nor lang?? In header ... */
{ int rc;

s++;
rc = t.unify_chars(PL_STRING|REP_UTF8, e-s, s);
return rc;
{ s++;
return t.unify_chars(PL_STRING|REP_UTF8, e-s, s);
} else if ( strncmp(e+1, "^^<", 3) == 0 )
{ PlTermv av(2);
int rc;

s++;
rc = av[0].unify_chars(PL_STRING|REP_UTF8, e-s, s);
int rc = av[0].unify_chars(PL_STRING|REP_UTF8, e-s, s);
e += 4;
rc = rc && av[1].unify_chars(PL_ATOM|REP_UTF8, strlen(e)-1, e);
rc = rc && PL_cons_functor_v(av[0].C_, FUNCTOR_rdftype2.C_, av[0].C_); // TODO: av[0].cons_functor_v()
rc = rc && t.unify_term(av[0]);
return rc;
} else if ( strncmp(e+1, "@", 1) == 0 )
{ PlTermv av(2);
int rc;

s++;
rc = av[0].unify_chars(PL_STRING|REP_UTF8, e-s, s);
int rc = av[0].unify_chars(PL_STRING|REP_UTF8, e-s, s);
e += 2;
rc = rc && av[1].unify_chars(PL_ATOM|REP_UTF8, (size_t)-1, e);
rc = rc && PL_cons_functor_v(av[0].C_, FUNCTOR_rdflang2.C_, av[0].C_); // TODO: av[0].cons_functor_v()
Expand All @@ -248,8 +254,7 @@ unify_object(PlTerm t, const char *s)
*/

PREDICATE_NONDET(hdt_search, 5)
{ int rc;
auto ctx = handle.context_unique_ptr<search_it>();
{ auto ctx = handle.context_unique_ptr<search_it>();

static PlAtom ATOM_content("content");
static PlAtom ATOM_header("header");
Expand All @@ -274,7 +279,8 @@ PREDICATE_NONDET(hdt_search, 5)
else
throw PlDomainError("hdt_where", A2);
} CATCH_HDT;
} // TODO: [[fallthrough]]
}
[[fallthrough]];
case PL_REDO:
{ if ( ctx->it->hasNext() )
{ TripleString *t = ctx->it->next();
Expand Down Expand Up @@ -421,7 +427,8 @@ PREDICATE_NONDET(hdt_column_, 3)
throw PlDomainError("hdt_column", A2);
} CATCH_HDT;

} // TODO: [[fallthrough]]
}
[[fallthrough]];
case PL_REDO:
if ( ctx->it->hasNext() )
{ unsigned char *s = ctx->it->next();
Expand All @@ -445,21 +452,18 @@ PREDICATE_NONDET(hdt_column_, 3)

PREDICATE_NONDET(hdt_object_, 2)
{ auto ctx = handle.context_unique_ptr<IteratorUCharString_ctx>();
uintptr_t mask = 0;

switch(handle.foreign_control())
{ case PL_FIRST_CALL:
{ ctx.reset(new IteratorUCharString_ctx());
hdt_wrapper *symb = PlBlobV<hdt_wrapper>::cast_ex(A1, hdt_blob);

try
{ ctx->it.reset(symb->hdt->getDictionary()->getObjects());
} CATCH_HDT;
} // TODO: [[fallthrough]]
}
[[fallthrough]];
case PL_REDO:
if ( ctx->it->hasNext() )
{ unsigned char *s = ctx->it->next();

int rc = unify_object(A2, reinterpret_cast<const char*>(s));
ctx->it->freeStr(s);
if ( rc )
Expand Down Expand Up @@ -564,7 +568,8 @@ PREDICATE_NONDET(hdt_search_id, 4)
{ TripleID t(s,p,o);
ctx->it.reset(symb->hdt->getTriples()->search(t));
} CATCH_HDT;
} // TODO: [[fallthrough]]
}
[[fallthrough]];
case PL_REDO:
{ if ( ctx->it->hasNext() )
{ TripleID *t = ctx->it->next();
Expand Down Expand Up @@ -630,8 +635,11 @@ PREDICATE(hdt_create_from_file, 3)
std::string base_uri("http://example.org/base");
RDFNotation notation = NTRIPLES;

if ( !A1.get_file_name(&hdt_file, PL_FILE_OSPATH) ||
!A2.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_READ) )
// TODO: call absolute_file_name/3 in Prolog, to allow default
// extension ".hdt" (also ".rdf"?)
// See comment in hdt_open_/3 with the call A2.get_file_name(...)
if ( !A1.get_file_name(&hdt_file, PL_FILE_OSPATH|PL_FILE_ABSOLUTE|PL_FILE_SEARCH) ||
!A2.get_file_name(&rdf_file, PL_FILE_OSPATH|PL_FILE_ABSOLUTE|PL_FILE_SEARCH|PL_FILE_READ) )
return false;

PlTerm_tail options(A3);
Expand Down
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ git submodule update --init ${HDTHOME}

cd ${HDTHOME}
./autogen.sh
./configure --prefix=${HOME}/.local CXXFLAGS='-fPIC -O2 -g' CFLAGS='-fPIC -O2 -g'
./configure --prefix=${HOME}/.local CXXFLAGS='-fPIC -O2 -Wall' CFLAGS='-fPIC -O2 -Wall'
4 changes: 2 additions & 2 deletions pack.pl
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name(hdt).
version('0.5.4').
pack_version(2).
% swipl_version([90302]). % TODO: add stable version
title('Access RDF HDT files').
keywords(['RDF']).
author( 'Jan Wielemaker', '[email protected]' ).
packager( 'Jan Wielemaker', '[email protected]' ).
maintainer( 'Jan Wielemaker', '[email protected]' ).
home( 'https://github.com/JanWielemaker/hdt' ).
download( 'https://github.com/JanWielemaker/hdt/archive/V*.zip' ).
download( 'https://github.com/JanWielemaker/hdt.git' ).
requires(prolog:c_cxx(_)).
11 changes: 8 additions & 3 deletions prolog/hdt.pl
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,15 @@
% How the file is accessed. One of `map` (map the file
% into memory, default) or `load` (load the content of the
% file).
%
% - indexed(+Boolean)
% Whether an index is created. Default is `true`. Such an index
% is needed for partially instantiated calls to hdt_search/4.
% The index is maintained in a file with extension `.index.v1-1`
% in the same directory as the HDT file. An index is not needed
% if you only want to extract _all_ triples.
% in the same directory as the HDT file. (When the index is
% created, some statistics are printed to standard error.)
% An index is not needed if you only want to extract _all_
% triples.

hdt_open(HDT, File) :-
hdt_open(HDT, File, []).
Expand Down Expand Up @@ -394,7 +397,9 @@
%! hdt_create_from_file(+HDTFile, +RDFFile, +Options)
%
% Create a HDT file from an RDF file. The format of RDFFile
% defaults to `ntriples` format. Options:
% defaults to `ntriples` format. The file names are expanded using
% absolute_file_name/3, but without any default extension (this
% is different from hdt_open/3 works). Options:
%
% * base_uri(+URI)
% URI is used for generating the header properties (see
Expand Down

0 comments on commit 6ec58e4

Please sign in to comment.