diff --git a/pom.xml b/pom.xml index b1ff7bb..04cca57 100644 --- a/pom.xml +++ b/pom.xml @@ -476,7 +476,7 @@ com.google.cloud.tools jib-maven-plugin - 3.2.0 + 3.4.4 jdeb diff --git a/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/cmd/CmdSparqlIntegrateMain.java b/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/cmd/CmdSparqlIntegrateMain.java index ccdc69e..274bb94 100644 --- a/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/cmd/CmdSparqlIntegrateMain.java +++ b/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/cmd/CmdSparqlIntegrateMain.java @@ -11,6 +11,7 @@ import java.util.concurrent.Callable; import org.aksw.jenax.arq.picocli.CmdMixinArq; +import org.aksw.jenax.arq.picocli.CmdMixinSparqlPaginate; import org.aksw.rdf_processing_toolkit.cli.cmd.CmdCommonBase; import org.aksw.rdf_processing_toolkit.cli.cmd.VersionProviderRdfProcessingToolkit; import org.aksw.sparql_integrate.cli.main.SparqlIntegrateCmdImpls; @@ -75,8 +76,6 @@ public class CmdSparqlIntegrateMain @Option(names = { "--cache-rewrite-groupby" }, description="Cache GROUP BY operations individually. Ignored if no cache engine is specified.") //, defaultValue = "false", fallbackValue = "true") public boolean cacheRewriteGroupBy = false; - - @Option(names = { "--tmpdir" }, description="Temporary directory") public String tempPath = StandardSystemProperty.JAVA_IO_TMPDIR.value(); @@ -96,6 +95,10 @@ public class CmdSparqlIntegrateMain @Mixin public CmdMixinArq arqConfig = new CmdMixinArq(); + /** Mixin for result set limit and pagination */ + @Mixin + public CmdMixinSparqlPaginate paginationConfig = new CmdMixinSparqlPaginate(); + @Option(names= {"--bnp", "--bnode-profile"}, description="Blank node profile, empty string ('') to disable; 'auto' to autodetect, defaults to ${DEFAULT-VALUE}", defaultValue = "") public String bnodeProfile = null; diff --git a/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/main/SparqlIntegrateCmdImpls.java b/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/main/SparqlIntegrateCmdImpls.java index 8a68ff8..956ee35 100644 --- a/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/main/SparqlIntegrateCmdImpls.java +++ b/rdf-processing-toolkit-cli/src/main/java/org/aksw/sparql_integrate/cli/main/SparqlIntegrateCmdImpls.java @@ -484,6 +484,16 @@ public static int sparqlIntegrate(CmdSparqlIntegrateMain cmd) throws Exception { } } + Long resultSetPageSize = cmd.paginationConfig.queryPageSize; + if (resultSetPageSize != null && resultSetPageSize > 0) { + dataSourceTmp = RdfDataEngines.wrapWithDataSourceTransform(dataSourceTmp, ds -> RdfDataSources.withPagination(ds, resultSetPageSize)); + } + + Long queryLimit = cmd.paginationConfig.queryLimit; + if (queryLimit != null && queryLimit > 0) { + dataSourceTmp = RdfDataEngines.wrapWithDataSourceTransform(dataSourceTmp, ds -> RdfDataSources.withLimit(ds, queryLimit)); + } + dataSourceTmp = RdfDataEngines.wrapWithQueryTransform(dataSourceTmp, null, QueryExecs::withDetailedHttpMessages); if (cmd.cachePath != null) { @@ -582,16 +592,19 @@ public static int sparqlIntegrate(CmdSparqlIntegrateMain cmd) throws Exception { // Load function macros (run sparql inferences first) Map udfRegistry = new LinkedHashMap<>(); + + // XXX There should be a separate registry for default macros to load. + loadMacros(macroProfiles, udfRegistry, "macros/ollama.ttl"); + for (String macroSource : cmd.macroSources) { - Model model = RDFDataMgr.loadModel(macroSource); - SparqlStmtMgr.execSparql(model, "udf-inferences.rq"); - Map contrib = UserDefinedFunctions.load(model, macroProfiles); - udfRegistry.putAll(contrib); + loadMacros(macroProfiles, udfRegistry, macroSource); } if (!cmd.macroSources.isEmpty()) { - logger.info("Loaded functions: {}", udfRegistry.keySet()); - logger.info("Loaded {} function definitions from {} macro sources.", udfRegistry.size(), cmd.macroSources.size()); + if (logger.isInfoEnabled()) { + logger.info("Loaded functions: {}", udfRegistry.keySet()); + logger.info("Loaded {} function definitions from {} macro sources.", udfRegistry.size(), cmd.macroSources.size()); + } // ExprTransform eform = new ExprTransformExpand(udfRegistry); ExprTransform eform = new ExprTransformCopy() { @Override @@ -602,8 +615,6 @@ public Expr transform(ExprFunctionN func, ExprList args) { }; SparqlStmtTransform stmtTransform = SparqlStmtTransforms.ofExprTransform(eform); dataSourceTmp = RdfDataEngines.wrapWithStmtTransform(dataSourceTmp, stmtTransform); - // QueryTransform qform = q -> QueryUtils.rewrite(q, op -> Transformer.transform(null, eform, op)); - // dataSourceTmp = RdfDataEngines.wrapWithQueryTransform(dataSourceTmp, qform, null); } RdfDataEngine datasetAndDelete = dataSourceTmp; @@ -848,10 +859,14 @@ public void afterExec() { server.start(); + // Try to get the host address from a network device (e.g. within a docker container) String hostAddress; try(final DatagramSocket socket = new DatagramSocket()){ socket.connect(InetAddress.getByName("1.1.1.1"), 53); hostAddress = socket.getLocalAddress().getHostAddress(); + } catch (Exception e) { + // Fall back to localhost + hostAddress = "localhost"; } URI browseUri = new URI("http://"+hostAddress+":" + port + "/"); if (Desktop.isDesktopSupported()) { @@ -928,6 +943,14 @@ public void afterExec() { return exitCode; } + private static void loadMacros(Set macroProfiles, Map udfRegistry, + String macroSource) { + Model model = RDFDataMgr.loadModel(macroSource); + SparqlStmtMgr.execSparql(model, "udf-inferences.rq"); + Map contrib = UserDefinedFunctions.load(model, macroProfiles); + udfRegistry.putAll(contrib); + } + /** Be careful not to call within a read transaction! */ public static void updateSpatialIndex(Dataset dataset) { Context cxt = dataset.getContext();