From ac44f7047827f61ec6b46a622d8f00ff2c1a9ef8 Mon Sep 17 00:00:00 2001 From: qaate47 Date: Thu, 12 Jan 2023 15:58:37 +0100 Subject: [PATCH] Use KWayMerger for HDT co-index creation to reduce random accesses and allow --- .../java/org/rdfhdt/hdt/hdt/HDTManager.java | 339 ++++++++++++++--- .../hdt/listener/MultiThreadListener.java | 26 ++ .../rdfhdt/hdt/listener/ProgressListener.java | 33 +- .../org/rdfhdt/hdt/options/HDTOptions.java | 222 ++++++++++-- .../rdfhdt/hdt/options/HDTOptionsKeys.java | 149 +++++++- .../main/java/org/rdfhdt/hdt/options/Key.java | 3 + .../java/org/rdfhdt/hdt/tools/HDTCat.java | 10 +- .../java/org/rdfhdt/hdt/tools/HdtSearch.java | 51 ++- .../java/org/rdfhdt/hdt/tools/RDF2HDT.java | 9 +- .../hdt/compact/bitmap/AdjacencyList.java | 2 +- .../hdt/compact/bitmap/Bitmap375Big.java | 3 +- .../hdt/compact/bitmap/Bitmap64Big.java | 12 +- .../rdfhdt/hdt/compact/bitmap/SyncBitmap.java | 141 ++++++++ .../org/rdfhdt/hdt/compact/integer/VByte.java | 40 +- .../rdfhdt/hdt/compact/sequence/Sequence.java | 4 + .../hdt/compact/sequence/SequenceInt32.java | 8 +- .../hdt/compact/sequence/SequenceInt64.java | 6 + .../hdt/compact/sequence/SequenceLog64.java | 5 + .../compact/sequence/SequenceLog64Big.java | 52 +-- .../sequence/SequenceLog64BigDisk.java | 35 +- .../compact/sequence/SequenceLog64Map.java | 7 +- .../org/rdfhdt/hdt/hdt/HDTManagerImpl.java | 2 +- .../java/org/rdfhdt/hdt/hdt/impl/HDTBase.java | 6 +- .../rdfhdt/hdt/hdt/impl/HDTDiskImporter.java | 10 +- .../java/org/rdfhdt/hdt/hdt/impl/HDTImpl.java | 2 +- .../impl/diskimport/AsyncCatTreeWorker.java | 2 +- .../hdt/hdt/impl/diskindex/DiskIndexSort.java | 160 ++++++++ .../hdt/impl/diskindex/ObjectAdjReader.java | 37 ++ .../hdt/iterator/utils/ExceptionIterator.java | 8 + .../hdt/iterator/utils/FetcherIterator.java | 59 +++ .../iterator/utils/MapExceptionIterator.java | 5 + .../utils/MergeExceptionIterator.java | 10 + .../utils/NotificationExceptionIterator.java | 5 + .../hdt/iterator/utils/SizeFetcher.java | 8 +- .../rdfhdt/hdt/options/HDTSpecification.java | 39 +- .../rdfhdt/hdt/options/HideHDTOptions.java | 8 +- .../rdfhdt/hdt/triples/TriplesPrivate.java | 3 +- .../hdt/triples/impl/BitmapTriples.java | 245 +++++++++++-- .../hdt/triples/impl/OneReadTempTriples.java | 3 +- .../hdt/triples/impl/PredicateIndex.java | 3 +- .../hdt/triples/impl/PredicateIndexArray.java | 26 +- .../rdfhdt/hdt/triples/impl/TriplesList.java | 3 +- .../hdt/triples/impl/TriplesListLong.java | 3 +- .../hdt/triples/impl/WriteBitmapTriples.java | 3 +- .../rdfhdt/hdt/util/disk/LargeLongArray.java | 13 +- .../org/rdfhdt/hdt/util/disk/LongArray.java | 14 +- .../rdfhdt/hdt/util/disk/LongArrayDisk.java | 26 +- .../hdt/util/disk/SimpleSplitLongArray.java | 7 +- .../rdfhdt/hdt/util/disk/SyncLongArray.java | 59 +++ .../java/org/rdfhdt/hdt/util/io/IOUtil.java | 103 +++++- .../util/io/compress/CompressNodeReader.java | 1 + .../org/rdfhdt/hdt/util/io/compress/Pair.java | 34 ++ .../util/io/compress/PairMergeIterator.java | 19 + .../hdt/util/io/compress/PairReader.java | 86 +++++ .../hdt/util/io/compress/PairWriter.java | 63 ++++ .../io/compress/WriteLongArrayBuffer.java | 1 + .../hdt/compact/array/LongArrayTest.java | 5 +- .../hdt/compact/sequence/LargeArrayTest.java | 3 +- .../dictionary/impl/kcat/KCatMergerTest.java | 13 +- .../hdt/triples/impl/BitmapTriplesTest.java | 342 +++++++++++++----- .../org/rdfhdt/hdt/util/ProfilerTest.java | 4 +- .../hdt/util/disk/LongArrayDiskTest.java | 27 ++ .../util/disk/SimpleSplitLongArrayTest.java | 5 +- .../hdt/util/io/compress/CompressTest.java | 36 +- hdt-java-package/bin/hdtSearch.ps1 | 6 + .../org/rdfhdt/hdtjena/HDTGraphAssembler.java | 4 +- .../org/rdfhdt/hdtjena/cmd/HDTSparql.java | 2 +- 67 files changed, 2303 insertions(+), 377 deletions(-) create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/SyncBitmap.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/diskindex/DiskIndexSort.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/diskindex/ObjectAdjReader.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/FetcherIterator.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SyncLongArray.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/Pair.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairMergeIterator.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairReader.java create mode 100644 hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairWriter.java diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java b/hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java index c0d24c51..3e15e78b 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/hdt/HDTManager.java @@ -3,8 +3,11 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.lang.reflect.InvocationTargetException; +import java.nio.file.Path; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; import org.rdfhdt.hdt.compact.bitmap.Bitmap; import org.rdfhdt.hdt.enums.CompressionType; @@ -25,16 +28,18 @@ private static HDTManager getInstance() { try { // Try to instantiate pro Class managerImplClass = Class.forName("org.rdfhdt.hdt.pro.HDTManagerProImpl"); - instance = (HDTManager) managerImplClass.newInstance(); + instance = (HDTManager) managerImplClass.getDeclaredConstructor().newInstance(); } catch (Exception e1) { try { // Pro not found, instantiate normal Class managerImplClass = Class.forName("org.rdfhdt.hdt.hdt.HDTManagerImpl"); - instance = (HDTManager) managerImplClass.newInstance(); + instance = (HDTManager) managerImplClass.getDeclaredConstructor().newInstance(); } catch (ClassNotFoundException e) { - throw new RuntimeException("Class org.rdfhdt.hdt.hdt.HDTManagerImpl not found. Did you include the HDT implementation jar?"); - } catch (InstantiationException e) { - throw new RuntimeException("Cannot create implementation for HDTManager. Does the class org.rdfhdt.hdt.hdt.HDTManagerImpl inherit from HDTManager?"); + throw new RuntimeException("Class org.rdfhdt.hdt.hdt.HDTManagerImpl not found. Did you include the HDT implementation jar?", e); + } catch (InstantiationException | InvocationTargetException e) { + throw new RuntimeException("Cannot create implementation for HDTManager. Does the class org.rdfhdt.hdt.hdt.HDTManagerImpl inherit from HDTManager?", e); + } catch (NoSuchMethodException e) { + throw new RuntimeException("Cannot create implementation for HDTManager. Does the class org.rdfhdt.hdt.hdt.HDTManagerImpl has a default empty constructor?", e); } catch (IllegalAccessException e) { throw new RuntimeException(e); } @@ -43,10 +48,27 @@ private static HDTManager getInstance() { return instance; } + public static HDTOptions readOptions(Path file) throws IOException { + return readOptions(file.toAbsolutePath().toString()); + } + public static HDTOptions readOptions(String file) throws IOException { return HDTManager.getInstance().doReadOptions(file); } + /** + * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan + * to do queries, use loadIndexedHDT() instead. + * @param hdtFileName file path to load + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @param hdtFormat Parameters to tune the loaded HDT index. Can be null for default options. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadHDT(Path hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { + return loadHDT(hdtFileName.toAbsolutePath().toString(), listener, hdtFormat); + } + /** * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan * to do queries, use loadIndexedHDT() instead. @@ -57,9 +79,20 @@ public static HDTOptions readOptions(String file) throws IOException { * @return HDT */ public static HDT loadHDT(String hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFileName, listener, hdtFormat); + return HDTManager.getInstance().doLoadHDT(hdtFileName, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(hdtFormat)); } + /** + * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan + * to do queries, use loadIndexedHDT() instead. + * @param hdtFileName file path to load + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadHDT(Path hdtFileName, ProgressListener listener) throws IOException { + return loadHDT(hdtFileName.toAbsolutePath().toString(), listener); + } /** * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan * to do queries, use loadIndexedHDT() instead. @@ -69,9 +102,19 @@ public static HDT loadHDT(String hdtFileName, ProgressListener listener, HDTOpti * @return HDT */ public static HDT loadHDT(String hdtFileName, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFileName, listener, null); + return loadHDT(hdtFileName, listener, null); } + /** + * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan + * to do queries, use loadIndexedHDT() instead. + * @param hdtFileName file path to load + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadHDT(Path hdtFileName) throws IOException { + return loadHDT(hdtFileName.toAbsolutePath().toString()); + } /** * Load an HDT file into memory to use it. NOTE: Use this method to go through all elements. If you plan * to do queries, use loadIndexedHDT() instead. @@ -80,9 +123,23 @@ public static HDT loadHDT(String hdtFileName, ProgressListener listener) throws * @return HDT */ public static HDT loadHDT(String hdtFileName) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFileName, null, null); + return loadHDT(hdtFileName, null, null); } + /** + * Map an HDT file into memory to use it. This method does not load the whole file into memory, + * it lets the OS to handle memory pages as desired. Therefore it uses less memory but can be slower + * for querying because it needs to load those blocks from disk. + * NOTE: Use this method to go through all elements. If you plan to do queries, use mapIndexedHDT() instead. + * @param hdtFileName file path to map + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @param hdtFormat Parameters to tune the loaded HDT index. Can be null for default options. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapHDT(Path hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { + return mapHDT(hdtFileName.toAbsolutePath().toString(), listener, hdtFormat); + } /** * Map an HDT file into memory to use it. This method does not load the whole file into memory, * it lets the OS to handle memory pages as desired. Therefore it uses less memory but can be slower @@ -95,7 +152,20 @@ public static HDT loadHDT(String hdtFileName) throws IOException { * @return HDT */ public static HDT mapHDT(String hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doMapHDT(hdtFileName, listener, hdtFormat); + return HDTManager.getInstance().doMapHDT(hdtFileName, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(hdtFormat)); + } + /** + * Map an HDT file into memory to use it. This method does not load the whole file into memory, + * it lets the OS to handle memory pages as desired. Therefore it uses less memory but can be slower + * for querying because it needs to load those blocks from disk. + * NOTE: Use this method to go through all elements. If you plan to do queries, use mapIndexedHDT() instead. + * @param hdtFileName file path to map + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapHDT(Path hdtFileName, ProgressListener listener) throws IOException { + return mapHDT(hdtFileName.toAbsolutePath().toString(), listener); } /** * Map an HDT file into memory to use it. This method does not load the whole file into memory, @@ -108,7 +178,19 @@ public static HDT mapHDT(String hdtFileName, ProgressListener listener, HDTOptio * @return HDT */ public static HDT mapHDT(String hdtFileName, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doMapHDT(hdtFileName, listener, null); + return mapHDT(hdtFileName, listener, null); + } + /** + * Map an HDT file into memory to use it. This method does not load the whole file into memory, + * it lets the OS to handle memory pages as desired. Therefore it uses less memory but can be slower + * for querying because it needs to load those blocks from disk. + * NOTE: Use this method to go through all elements. If you plan to do queries, use mapIndexedHDT() instead. + * @param hdtFileName file path to map + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapHDT(Path hdtFileName) throws IOException { + return mapHDT(hdtFileName.toAbsolutePath().toString()); } /** * Map an HDT file into memory to use it. This method does not load the whole file into memory, @@ -120,7 +202,7 @@ public static HDT mapHDT(String hdtFileName, ProgressListener listener) throws I * @return HDT */ public static HDT mapHDT(String hdtFileName) throws IOException { - return HDTManager.getInstance().doMapHDT(hdtFileName, null, null); + return mapHDT(hdtFileName, null, null); } /** @@ -133,7 +215,7 @@ public static HDT mapHDT(String hdtFileName) throws IOException { * @return HDT */ public static HDT loadHDT(InputStream hdtFile, ProgressListener listener, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFile, listener, hdtFormat); + return HDTManager.getInstance().doLoadHDT(hdtFile, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(hdtFormat)); } /** * Load an HDT from an InputStream (File, socket...). NOTE: Use this method to go through all elements. If you plan @@ -144,7 +226,7 @@ public static HDT loadHDT(InputStream hdtFile, ProgressListener listener, HDTOpt * @return HDT */ public static HDT loadHDT(InputStream hdtFile, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFile, listener, null); + return loadHDT(hdtFile, listener, null); } /** * Load an HDT from an InputStream (File, socket...). NOTE: Use this method to go through all elements. If you plan @@ -154,7 +236,40 @@ public static HDT loadHDT(InputStream hdtFile, ProgressListener listener) throws * @return HDT */ public static HDT loadHDT(InputStream hdtFile) throws IOException { - return HDTManager.getInstance().doLoadHDT(hdtFile, null, null); + return loadHDT(hdtFile, null, null); + } + + /** + * Load an HDT File, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to load + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @param hdtFormat Parameters to tune the loaded HDT index. Can be null for default options. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadIndexedHDT(Path hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { + return loadIndexedHDT(hdtFileName.toAbsolutePath().toString(), listener, hdtFormat); + } + + /** + * Load an HDT File, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to load + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadIndexedHDT(Path hdtFileName, ProgressListener listener) throws IOException { + return loadIndexedHDT(hdtFileName.toAbsolutePath().toString(), listener); + } + + /** + * Load an HDT File, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to load + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT loadIndexedHDT(Path hdtFileName) throws IOException { + return loadIndexedHDT(hdtFileName.toAbsolutePath().toString()); } /** @@ -166,7 +281,7 @@ public static HDT loadHDT(InputStream hdtFile) throws IOException { * @return HDT */ public static HDT loadIndexedHDT(String hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, listener, hdtFormat); + return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(hdtFormat)); } /** @@ -177,7 +292,7 @@ public static HDT loadIndexedHDT(String hdtFileName, ProgressListener listener, * @return HDT */ public static HDT loadIndexedHDT(String hdtFileName, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, listener, null); + return loadIndexedHDT(hdtFileName, listener, null); } /** * Load an HDT File, and load/create additional indexes to support all kind of queries efficiently. @@ -186,7 +301,38 @@ public static HDT loadIndexedHDT(String hdtFileName, ProgressListener listener) * @return HDT */ public static HDT loadIndexedHDT(String hdtFileName) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, null, null); + return loadIndexedHDT(hdtFileName, null, null); + } + + /** + * Maps an HDT File into virtual memory, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to map + * @param spec HDTOptions to the new mapped HDT. Can be null for default options. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapIndexedHDT(Path hdtFileName, HDTOptions spec, ProgressListener listener) throws IOException { + return mapIndexedHDT(hdtFileName.toAbsolutePath().toString(), spec, listener); + } + /** + * Maps an HDT File into virtual memory, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to map + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapIndexedHDT(Path hdtFileName, ProgressListener listener) throws IOException { + return mapIndexedHDT(hdtFileName.toAbsolutePath().toString(), ProgressListener.ofNullable(listener)); + } + /** + * Maps an HDT File into virtual memory, and load/create additional indexes to support all kind of queries efficiently. + * @param hdtFileName file path to map + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT mapIndexedHDT(Path hdtFileName) throws IOException { + return mapIndexedHDT(hdtFileName.toAbsolutePath().toString()); } /** @@ -198,7 +344,7 @@ public static HDT loadIndexedHDT(String hdtFileName) throws IOException { * @return HDT */ public static HDT mapIndexedHDT(String hdtFileName, HDTOptions spec, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doMapIndexedHDT(hdtFileName, listener, spec); + return HDTManager.getInstance().doMapIndexedHDT(hdtFileName, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(spec)); } /** @@ -209,7 +355,7 @@ public static HDT mapIndexedHDT(String hdtFileName, HDTOptions spec, ProgressLis * @return HDT */ public static HDT mapIndexedHDT(String hdtFileName, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doMapIndexedHDT(hdtFileName, listener, null); + return mapIndexedHDT(hdtFileName, null, listener); } /** @@ -219,7 +365,7 @@ public static HDT mapIndexedHDT(String hdtFileName, ProgressListener listener) t * @return HDT */ public static HDT mapIndexedHDT(String hdtFileName) throws IOException { - return HDTManager.getInstance().doMapIndexedHDT(hdtFileName, null, null); + return mapIndexedHDT(hdtFileName, null, null); } /** @@ -230,7 +376,7 @@ public static HDT mapIndexedHDT(String hdtFileName) throws IOException { * @return HDT */ public static HDT loadIndexedHDT(InputStream hdtFileName, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, listener, null); + return loadIndexedHDT(hdtFileName, listener, null); } /** @@ -240,7 +386,7 @@ public static HDT loadIndexedHDT(InputStream hdtFileName, ProgressListener liste * @return HDT */ public static HDT loadIndexedHDT(InputStream hdtFileName) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, null, null); + return loadIndexedHDT(hdtFileName, null, null); } /** * Load an HDT file from InputStream, and create additional indexes to support all kind of queries efficiently. @@ -250,7 +396,7 @@ public static HDT loadIndexedHDT(InputStream hdtFileName) throws IOException { * @return HDT */ public static HDT loadIndexedHDT(InputStream hdtFileName, ProgressListener listener, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, listener, hdtFormat); + return HDTManager.getInstance().doLoadIndexedHDT(hdtFileName, ProgressListener.ofNullable(listener), HDTOptions.ofNullable(hdtFormat)); } /** @@ -261,9 +407,24 @@ public static HDT loadIndexedHDT(InputStream hdtFileName, ProgressListener liste * @return HDT */ public static HDT indexedHDT(HDT hdt, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doIndexedHDT(hdt, listener); + return HDTManager.getInstance().doIndexedHDT(hdt, ProgressListener.ofNullable(listener)); } + /** + * Create an HDT file from an RDF file. + * @param rdfFileName File name. + * @param baseURI Base URI for the dataset. + * @param rdfNotation Format of the source RDF File (NTriples, N3, RDF-XML...) + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * + * @throws IOException when the file cannot be found + * @throws ParserException when the file cannot be parsed + * @return HDT + */ + public static HDT generateHDT(Path rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { + return generateHDT(rdfFileName.toAbsolutePath().toString(), baseURI, rdfNotation, hdtFormat, listener); + } /** * Create an HDT file from an RDF file. * @param rdfFileName File name. @@ -277,7 +438,7 @@ public static HDT indexedHDT(HDT hdt, ProgressListener listener) throws IOExcept * @return HDT */ public static HDT generateHDT(String rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDT(rdfFileName, baseURI, rdfNotation, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDT(rdfFileName, baseURI, rdfNotation, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** @@ -291,7 +452,7 @@ public static HDT generateHDT(String rdfFileName, String baseURI, RDFNotation rd * @return HDT */ public static HDT generateHDT(Iterator iterator, String baseURI, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDT(iterator, baseURI, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDT(iterator, baseURI, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from a RDF stream. @@ -305,7 +466,7 @@ public static HDT generateHDT(Iterator iterator, String baseURI, H * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDT(InputStream fileStream, String baseURI, String filename, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, RDFNotation.guess(filename), CompressionType.guess(filename), hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, RDFNotation.guess(filename), CompressionType.guess(filename), HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from a RDF stream. @@ -320,7 +481,7 @@ public static HDT generateHDT(InputStream fileStream, String baseURI, String fil * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDT(InputStream fileStream, String baseURI, RDFNotation rdfNotation, CompressionType compressionType, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, rdfNotation, compressionType, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, rdfNotation, compressionType, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from a RDF stream. @@ -334,7 +495,7 @@ public static HDT generateHDT(InputStream fileStream, String baseURI, RDFNotatio * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDT(InputStream fileStream, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, rdfNotation, CompressionType.NONE, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDT(fileStream, baseURI, rdfNotation, CompressionType.NONE, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** @@ -351,7 +512,7 @@ public static HDT generateHDT(InputStream fileStream, String baseURI, RDFNotatio * @throws ParserException when the RDF file can't be parsed */ public static HDT generateHDTDisk(String rdfFileName, String baseURI, RDFNotation rdfNotation, CompressionType compressionType, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, rdfNotation, compressionType, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, rdfNotation, compressionType, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF file without compression by sorting the triples on disk, reduce the memory @@ -366,7 +527,7 @@ public static HDT generateHDTDisk(String rdfFileName, String baseURI, RDFNotatio * @throws ParserException when the RDF file can't be parsed */ public static HDT generateHDTDisk(String rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, rdfNotation, CompressionType.NONE, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, rdfNotation, CompressionType.NONE, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF file by sorting the triples on disk, reduce the memory required by increasing the @@ -380,7 +541,7 @@ public static HDT generateHDTDisk(String rdfFileName, String baseURI, RDFNotatio * @throws ParserException when the RDF file can't be parsed */ public static HDT generateHDTDisk(String rdfFileName, String baseURI, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, RDFNotation.guess(rdfFileName), CompressionType.guess(rdfFileName), hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(rdfFileName, baseURI, RDFNotation.guess(rdfFileName), CompressionType.guess(rdfFileName), HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF stream by sorting the triples on disk, reduce the memory required by increasing @@ -395,7 +556,7 @@ public static HDT generateHDTDisk(String rdfFileName, String baseURI, HDTOptions * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, String filename, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, RDFNotation.guess(filename), CompressionType.guess(filename), hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, RDFNotation.guess(filename), CompressionType.guess(filename), HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF stream by sorting the triples on disk, reduce the memory required by increasing @@ -411,7 +572,7 @@ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, String * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, RDFNotation rdfNotation, CompressionType compressionType, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, rdfNotation, compressionType, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, rdfNotation, compressionType, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF stream by sorting the triples on disk, reduce the memory required by increasing @@ -426,7 +587,7 @@ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, RDFNot * @throws ParserException when the RDF stream can't be parsed */ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, rdfNotation, CompressionType.NONE, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(fileStream, baseURI, rdfNotation, CompressionType.NONE, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } /** * Create an HDT file from an RDF stream by sorting the triples on disk, reduce the memory required by increasing @@ -438,15 +599,33 @@ public static HDT generateHDTDisk(InputStream fileStream, String baseURI, RDFNot * @throws IOException when the stream cannot be used */ public static HDT generateHDTDisk(Iterator iterator, String baseURI, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doGenerateHDTDisk(iterator, baseURI, hdtFormat, listener); + return HDTManager.getInstance().doGenerateHDTDisk(iterator, baseURI, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } public static TripleWriter getHDTWriter(OutputStream out, String baseURI, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doGetHDTWriter(out, baseURI, hdtFormat); + return HDTManager.getInstance().doGetHDTWriter(out, baseURI, HDTOptions.ofNullable(hdtFormat)); + } + + public static TripleWriter getHDTWriter(Path outFile, String baseURI, HDTOptions hdtFormat) throws IOException { + return getHDTWriter(outFile.toAbsolutePath().toString(), baseURI, hdtFormat); } public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptions hdtFormat) throws IOException { - return HDTManager.getInstance().doGetHDTWriter(outFile, baseURI, hdtFormat); + return HDTManager.getInstance().doGetHDTWriter(outFile, baseURI, HDTOptions.ofNullable(hdtFormat)); + } + + /** + * Create an HDT file from two HDT files by joining the triples. + * @param location where the new HDT file is stored + * @param hdtFileName1 First hdt file name + * @param hdtFileName2 Second hdt file name + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT catHDT(Path location, Path hdtFileName1, Path hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException { + return catHDT(location.toAbsolutePath().toString(), hdtFileName1.toAbsolutePath().toString(), hdtFileName2.toAbsolutePath().toString(), hdtFormat, listener); } /** @@ -460,9 +639,20 @@ public static TripleWriter getHDTWriter(String outFile, String baseURI, HDTOptio * @return HDT */ public static HDT catHDT(String location, String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doHDTCat(location, hdtFileName1, hdtFileName2, hdtFormat, listener); + return HDTManager.getInstance().doHDTCat(location, hdtFileName1, hdtFileName2, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } + /** + * Create an HDT file from HDT files by joining the triples. + * @param hdtFileNames hdt file names + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT catHDTPath(List hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException { + return catHDT(hdtFileNames.stream().map(p -> p.toAbsolutePath().toString()).collect(Collectors.toList()), hdtFormat, listener); + } /** * Create an HDT file from HDT files by joining the triples. * @param hdtFileNames hdt file names @@ -472,7 +662,19 @@ public static HDT catHDT(String location, String hdtFileName1, String hdtFileNam * @return HDT */ public static HDT catHDT(List hdtFileNames, HDTOptions hdtFormat, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doHDTCat(hdtFileNames, hdtFormat, listener); + return HDTManager.getInstance().doHDTCat(hdtFileNames, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); + } + /** + * Create a new HDT by removing from hdt1 the triples of hdt2. + * @param hdtFileName1 First hdt file name + * @param hdtFileName2 Second hdt file name + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @return HDT + * @throws IOException when the file cannot be found + */ + public static HDT diffHDT(Path hdtFileName1, Path hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException { + return diffHDT(hdtFileName1.toAbsolutePath().toString(), hdtFileName2.toAbsolutePath().toString(), hdtFormat, listener); } /** * Create a new HDT by removing from hdt1 the triples of hdt2. @@ -484,7 +686,21 @@ public static HDT catHDT(List hdtFileNames, HDTOptions hdtFormat, Progre * @throws IOException when the file cannot be found */ public static HDT diffHDT(String hdtFileName1, String hdtFileName2, HDTOptions hdtFormat, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doHDTDiff(hdtFileName1, hdtFileName2, hdtFormat, listener); + return HDTManager.getInstance().doHDTDiff(hdtFileName1, hdtFileName2, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); + } + + /** + * Create a new HDT by removing from a hdt all the triples marked to delete in a bitmap + * @param location where the new HDT file is stored + * @param hdtFileName hdt file name + * @param deleteBitmap delete bitmap + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @return HDT + * @throws IOException when the file cannot be found + */ + public static HDT diffHDTBit(Path location, String hdtFileName, Bitmap deleteBitmap, HDTOptions hdtFormat, ProgressListener listener) throws IOException { + return diffHDTBit(location.toAbsolutePath().toString(), hdtFileName, deleteBitmap, hdtFormat, listener); } /** @@ -498,9 +714,21 @@ public static HDT diffHDT(String hdtFileName1, String hdtFileName2, HDTOptions h * @throws IOException when the file cannot be found */ public static HDT diffHDTBit(String location, String hdtFileName, Bitmap deleteBitmap, HDTOptions hdtFormat, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doHDTDiffBit(location, hdtFileName, deleteBitmap, hdtFormat, listener); + return HDTManager.getInstance().doHDTDiffBit(location, hdtFileName, deleteBitmap, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } + /** + * Create an HDT file from HDT files by joining the triples and removing some triples with delete bitmaps + * @param hdtFileNames hdt file names + * @param deleteBitmaps the bitmaps for each HDT in hdtFileNames, should be the same size as hdtFileNames, see {@link org.rdfhdt.hdt.compact.bitmap.BitmapFactory#empty()} + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * @throws IOException when the file cannot be found + * @return HDT + */ + public static HDT diffBitCatHDTPath(List hdtFileNames, List deleteBitmaps, HDTOptions hdtFormat, ProgressListener listener) throws IOException { + return diffBitCatHDT(hdtFileNames.stream().map(p -> p.toAbsolutePath().toString()).collect(Collectors.toList()), deleteBitmaps, hdtFormat, listener); + } /** * Create an HDT file from HDT files by joining the triples and removing some triples with delete bitmaps * @param hdtFileNames hdt file names @@ -511,10 +739,29 @@ public static HDT diffHDTBit(String location, String hdtFileName, Bitmap deleteB * @return HDT */ public static HDT diffBitCatHDT(List hdtFileNames, List deleteBitmaps, HDTOptions hdtFormat, ProgressListener listener) throws IOException { - return HDTManager.getInstance().doHDTDiffBitCat(hdtFileNames, deleteBitmaps, hdtFormat, listener); + return HDTManager.getInstance().doHDTDiffBitCat(hdtFileNames, deleteBitmaps, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } + /** + * Create an HDT file from an RDF file in a tree, stop the chunk creation with the fluxStop + * + * @param fluxStop Flux stopper + * @param supplier HDT supplier to create initial HDT before cat + * @param rdfFileName File name. + * @param baseURI Base URI for the dataset. + * @param rdfNotation Format of the source RDF File (NTriples, N3, RDF-XML...) + * @param hdtFormat Parameters to tune the generated HDT. + * @param listener Listener to get notified of loading progress. Can be null if no notifications needed. + * + * @throws IOException when the file cannot be found + * @throws ParserException when the file cannot be parsed + * @return HDT + */ + public static HDT catTree(RDFFluxStop fluxStop, HDTSupplier supplier, Path rdfFileName, String baseURI, RDFNotation rdfNotation, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { + return catTree(fluxStop, supplier, rdfFileName.toAbsolutePath().toString(), baseURI, rdfNotation, hdtFormat, listener); + } + /** * Create an HDT file from an RDF file in a tree, stop the chunk creation with the fluxStop * @@ -531,7 +778,7 @@ public static HDT diffBitCatHDT(List hdtFileNames, List iterator, String baseURI, HDTOptions hdtFormat, ProgressListener listener) throws IOException, ParserException { - return HDTManager.getInstance().doHDTCatTree(fluxStop, supplier, iterator, baseURI, hdtFormat, listener); + return HDTManager.getInstance().doHDTCatTree(fluxStop, supplier, iterator, baseURI, HDTOptions.ofNullable(hdtFormat), ProgressListener.ofNullable(listener)); } // Abstract methods for the current implementation diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/listener/MultiThreadListener.java b/hdt-api/src/main/java/org/rdfhdt/hdt/listener/MultiThreadListener.java index a9014b80..fd998275 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/listener/MultiThreadListener.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/listener/MultiThreadListener.java @@ -5,6 +5,32 @@ */ @FunctionalInterface public interface MultiThreadListener extends ProgressListener { + /** + * empty progress listener + * + * @return progress listener + */ + static MultiThreadListener ignore() { + return ((thread, level, message) -> { + }); + } + + /** + * @return progress listener returning to sdtout + */ + static MultiThreadListener sout() { + return ((thread, level, message) -> System.out.println(level + " - " + message)); + } + + /** + * progress listener of a nullable listener + * + * @param listener listener + * @return listener or ignore listener + */ + static MultiThreadListener ofNullable(MultiThreadListener listener) { + return listener == null ? ignore() : listener; + } /** * Send progress notification diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/listener/ProgressListener.java b/hdt-api/src/main/java/org/rdfhdt/hdt/listener/ProgressListener.java index 124ad0fd..eec2e9a1 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/listener/ProgressListener.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/listener/ProgressListener.java @@ -29,14 +29,41 @@ /** * Interface for notifying the progress of an operation. - * - * @author mario.arias * + * @author mario.arias */ public interface ProgressListener { + /** + * empty progress listener + * + * @return progress listener + */ + static ProgressListener ignore() { + return ((level, message) -> { + }); + } + + /** + * @return progress listener returning to sdtout + */ + static ProgressListener sout() { + return ((level, message) -> System.out.println(level + " - " + message)); + } + + /** + * progress listener of a nullable listener + * + * @param listener listener + * @return listener or ignore listener + */ + static ProgressListener ofNullable(ProgressListener listener) { + return listener == null ? ignore() : listener; + } + /** * Send progress notification - * @param level percent of the task accomplished + * + * @param level percent of the task accomplished * @param message Description of the operation */ void notifyProgress(float level, String message); diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java b/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java index 13dbbfe1..b8637175 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptions.java @@ -28,24 +28,28 @@ package org.rdfhdt.hdt.options; import org.rdfhdt.hdt.exceptions.NotImplementedException; +import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdt.rdf.RDFFluxStop; import org.rdfhdt.hdt.util.Profiler; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Set; +import java.io.IOException; +import java.io.InputStream; +import java.io.Writer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; import java.util.function.DoubleSupplier; import java.util.function.LongSupplier; import java.util.function.Supplier; /** * Options storage, see {@link org.rdfhdt.hdt.options.HDTOptionsKeys} for more information. + * * @author mario.arias */ public interface HDTOptions { /** - * empty option, can be used to set values + * empty option, can't be used to set values */ HDTOptions EMPTY = new HDTOptions() { @Override @@ -63,22 +67,20 @@ public String get(String key) { public void set(String key, String value) { throw new NotImplementedException("set"); } - }; - /** - * @return create empty, modifiable options - */ - static HDTOptions of() { - return of(Map.of()); - } + @Override + public Set getKeys() { + return Collections.emptySet(); + } + }; /** - * create modifiable options starting from the copy of the data map - * @param data data map + * create modifiable options + * * @return options */ - static HDTOptions of(Map data) { - Map map = new HashMap<>(data); + static HDTOptions of() { + Map map = new TreeMap<>(); return new HDTOptions() { @Override public void clear() { @@ -94,10 +96,78 @@ public String get(String key) { public void set(String key, String value) { map.put(key, value); } + + @Override + public Set getKeys() { + return Collections.unmodifiableSet(map.keySet()); + } }; } + /** + * create modifiable options starting from the copy of the data map + * + * @param data data map + * @return options + */ + static HDTOptions of(Map data) { + Objects.requireNonNull(data, "data map can't be null!"); + HDTOptions opt = of(); + opt.setOptions(data); + return opt; + } + + + /** + * create modifiable options starting from initial config, each param should be in the format (key, value)* + * + * @param data data map + * @return options + * @throws IllegalArgumentException if the number of param isn't even + */ + static HDTOptions of(Object... data) { + Objects.requireNonNull(data, "data can't be null!"); + HDTOptions opt = of(); + opt.setOptions(data); + return opt; + } + + /** + * get options or {@link #EMPTY} + * @param options options + * @return options or {@link #EMPTY}, this result has no guaranty or mutability + */ + static HDTOptions ofNullable(HDTOptions options) { + return Objects.requireNonNullElse(options, EMPTY); + } + + /** + * create modifiable options from a file configuration + * + * @param filename file containing the options, see {@link #load(Path)} + * @return options + */ + static HDTOptions readFromFile(Path filename) throws IOException { + return HDTManager.readOptions( + Objects.requireNonNull(filename, "filename can't be null!") + ); + } + + /** + * create modifiable options from a file configuration + * + * @param filename file containing the options, see {@link #load(String)} + * @return options + */ + static HDTOptions readFromFile(String filename) throws IOException { + // use readOptions to have access to HTTP(s) files + return HDTManager.readOptions( + Objects.requireNonNull(filename, "filename can't be null!") + ); + } + + /** * clear all the options */ @@ -111,8 +181,12 @@ public void set(String key, String value) { */ String get(String key); - default Set getKeys() { - throw new NotImplementedException(); + /** + * @return the keys of the options + * @throws NotImplementedException if the implemented class do not implement this method (backward compatibility) + */ + default Set getKeys() { + throw new NotImplementedException("getKeys"); } /** @@ -146,10 +220,11 @@ default String get(String key, Supplier defaultValue) { default boolean getBoolean(String key) { return "true".equalsIgnoreCase(get(key)); } + /** * get a boolean * - * @param key key + * @param key key * @param defaultValue default value * @return boolean or false if the value isn't defined */ @@ -265,6 +340,34 @@ default long getInt(String key, long defaultValue) { return getInt(key, () -> defaultValue); } + + /** + * load properties from a path, see {@link Properties#load(InputStream)} for the format + * + * @param filename file + * @throws IOException load io exception + */ + default void load(Path filename) throws IOException { + Objects.requireNonNull(filename, "filename can't be null"); + Properties properties = new Properties(); + + try (InputStream is = Files.newInputStream(filename)) { + properties.load(is); + } + + properties.forEach((k, v) -> set(String.valueOf(k), v)); + } + + /** + * load properties from a file, see {@link Properties#load(InputStream)} for the format + * + * @param filename file + * @throws IOException load io exception + */ + default void load(String filename) throws IOException { + load(Path.of(Objects.requireNonNull(filename, "filename can't be null"))); + } + /** * set an option value * @@ -280,7 +383,11 @@ default long getInt(String key, long defaultValue) { * @param value value */ default void set(String key, Object value) { - set(key, String.valueOf(value)); + if (value instanceof RDFFluxStop) { + set(key, (RDFFluxStop) value); + } else { + set(key, String.valueOf(value)); + } } /** @@ -295,7 +402,8 @@ default void set(String key, RDFFluxStop fluxStop) { /** * set a profiler id - * @param key key + * + * @param key key * @param profiler profiler */ default void set(String key, Profiler profiler) { @@ -322,9 +430,79 @@ default void setOptions(String options) { int pos = item.indexOf('='); if (pos != -1) { String property = item.substring(0, pos); - String value = item.substring(pos+1); + String value = item.substring(pos + 1); set(property, value); } } } + + /** + * add options + * + * @param options options + */ + default void setOptions(Map options) { + options.forEach((k, v) -> set(String.valueOf(k), v)); + } + + /** + * add options, each param should be in the format (key, value)* + * + * @param options options + */ + default void setOptions(Object... options) { + if ((options.length & 1) != 0) { + throw new IllegalArgumentException("options.length should be even!"); + } + + int len = options.length >> 1; + for (int i = 0; i < len; i++) { + String key = String.valueOf(options[(i << 1)]); + Object value = options[(i << 1) | 1]; + set(key, value); + } + } + + /** + * Write this options into a config file + * + * @param file file + * @throws IOException io exception + */ + default void write(Path file) throws IOException { + write(file, true); + } + /** + * Write this options into a config file + * @param file file + * @param withComment write comments + * @throws IOException io exception + */ + default void write(Path file, boolean withComment) throws IOException { + try (Writer w = Files.newBufferedWriter(file)){ + write(w, withComment); + } + } + /** + * Write this options into a config file + * @param w writer + * @param withComment write comments + * @throws IOException io exception + */ + default void write(Writer w, boolean withComment) throws IOException { + Map optionMap = HDTOptionsKeys.getOptionMap(); + + for (Object okey : getKeys()) { + String key = String.valueOf(okey); + String value = get(key); + + if (withComment) { + HDTOptionsKeys.Option opt = optionMap.get(key); + if (opt != null) { + w.write("# " + opt.getKeyInfo().desc() + "\n# Type: " + opt.getKeyInfo().type().getTitle() + "\n"); + } + } + w.write(key + "=" + value + "\n"); + } + } } diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java b/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java index c0d98b2e..e825676e 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/options/HDTOptionsKeys.java @@ -4,6 +4,7 @@ import org.rdfhdt.hdt.rdf.RDFFluxStop; import java.lang.reflect.Field; +import java.lang.reflect.Modifier; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -315,37 +316,147 @@ public class HDTOptionsKeys { @Key(type = Key.Type.BOOLEAN, desc = "Delete the HDTCat temp files directory after HDTCat, default to true") public static final String HDTCAT_DELETE_LOCATION = "hdtcat.deleteLocation"; - @Key(type = Key.Type.BOOLEAN, desc = "Use disk implementation to generate the hdt sub-index") + /** + * Use disk implementation to generate the hdt sub-index, default false + */ + @Key(type = Key.Type.BOOLEAN, desc = "Use disk implementation to generate the hdt sub-index, default false") public static final String BITMAPTRIPLES_SEQUENCE_DISK = "bitmaptriples.sequence.disk"; - @Key(type = Key.Type.BOOLEAN, desc = "Use disk 375 subindex implementation to generate the hdt sub-index") + + /** + * Use disk 375 bitmap subindex implementation to generate the HDT sub index, default false + */ + @Key(type = Key.Type.BOOLEAN, desc = "Use disk 375 subindex implementation to generate the hdt sub-index, default false") public static final String BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX = "bitmaptriples.sequence.disk.subindex"; - @Key(type = Key.Type.BOOLEAN, desc = "Disk location for the " + BITMAPTRIPLES_SEQUENCE_DISK + " option") + /** + * disk location for the {@link #BITMAPTRIPLES_SEQUENCE_DISK} option + */ + @Key(type = Key.Type.PATH, desc = "Disk location for the " + BITMAPTRIPLES_SEQUENCE_DISK + " option") public static final String BITMAPTRIPLES_SEQUENCE_DISK_LOCATION = "bitmaptriples.sequence.disk.location"; + + /** + * Bitmap type for the Y bitmap, default {@link HDTVocabulary#BITMAP_TYPE_PLAIN} + */ + @Key(type = Key.Type.STRING, desc = "Bitmap type for the Y bitmap, default " + HDTVocabulary.BITMAP_TYPE_PLAIN) + public static final String BITMAPTRIPLES_BITMAP_Y = "bitmap.y"; + + + /** + * Bitmap type for the Z bitmap, default {@link HDTVocabulary#BITMAP_TYPE_PLAIN} + */ + @Key(type = Key.Type.STRING, desc = "Bitmap type for the Z bitmap, default " + HDTVocabulary.BITMAP_TYPE_PLAIN) + public static final String BITMAPTRIPLES_BITMAP_Z = "bitmap.z"; + + /** + * Sequence type for the Y sequence, default {@link HDTVocabulary#SEQ_TYPE_LOG} + */ + @Key(type = Key.Type.STRING, desc = "Sequence type for the Y sequence, default " + HDTVocabulary.SEQ_TYPE_LOG) + public static final String BITMAPTRIPLES_SEQ_Y = "seq.y"; + + /** + * Sequence type for the Z sequence, default {@link HDTVocabulary#SEQ_TYPE_LOG} + */ + @Key(type = Key.Type.STRING, desc = "Sequence type for the Z sequence, default " + HDTVocabulary.SEQ_TYPE_LOG) + public static final String BITMAPTRIPLES_SEQ_Z = "seq.z"; + + /** + * Indexing method for the bitmap triples, default {@link #BITMAPTRIPLES_INDEX_METHOD_VALUE_RECOMMENDED} + */ + @Key(type = Key.Type.ENUM, desc = "Indexing method for the bitmap triples") + public static final String BITMAPTRIPLES_INDEX_METHOD_KEY = "bitmaptriples.indexmethod"; + + /** + * value for {@link #BITMAPTRIPLES_INDEX_METHOD_KEY}. Recommended implementation, default value + */ + @Value(key = BITMAPTRIPLES_INDEX_METHOD_KEY, desc = "Recommended implementation, default value") + public static final String BITMAPTRIPLES_INDEX_METHOD_VALUE_RECOMMENDED = "recommended"; + + /** + * value for {@link #BITMAPTRIPLES_INDEX_METHOD_KEY}. Legacy implementation, fast, but memory inefficient + */ + @Value(key = BITMAPTRIPLES_INDEX_METHOD_KEY, desc = "Legacy implementation, fast, but memory inefficient") + public static final String BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY = "legacy"; + + /** + * value for {@link #BITMAPTRIPLES_INDEX_METHOD_KEY}. Disk option, handle the indexing on disk to reduce usage + */ + @Value(key = BITMAPTRIPLES_INDEX_METHOD_KEY, desc = "Disk option, handle the indexing on disk to reduce usage") + public static final String BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK = "disk"; + /** + * value for {@link #BITMAPTRIPLES_INDEX_METHOD_KEY}. Memory optimized option + */ + @Value(key = BITMAPTRIPLES_INDEX_METHOD_KEY, desc = "Memory optimized option") + public static final String BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED = "optimized"; + + /** + * Key for the {@link org.rdfhdt.hdt.hdt.HDTManager} loadIndexed methods, + * say the number of workers to merge the data. default to the number of processor. long value. + */ + @Key(type = Key.Type.NUMBER, desc = "Number of core used to index the HDT with " + BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK + " index method.") + public static final String BITMAPTRIPLES_DISK_WORKER_KEY = "bitmaptriples.indexmethod.disk.compressWorker"; + /** + * Key for the maximum size of a chunk on disk for the {@link org.rdfhdt.hdt.hdt.HDTManager} generateHDTDisk + * methods, the chunk should be in RAM before writing it on disk and should be sorted. long value. + */ + @Key(type = Key.Type.NUMBER, desc = "Maximum size of a chunk") + public static final String BITMAPTRIPLES_DISK_CHUNK_SIZE_KEY = "bitmaptriples.indexmethod.disk.chunkSize"; + /** + * Key for the size of the buffers when opening a file + */ + @Key(type = Key.Type.NUMBER, desc = "Size of the file buffers") + public static final String BITMAPTRIPLES_DISK_BUFFER_SIZE_KEY = "bitmaptriples.indexmethod.disk.fileBufferSize"; + /** + * Key for the maximum number of file opened at the same time, should be greater than {@link #BITMAPTRIPLES_DISK_KWAY_KEY}, + * 1024 by default + */ + @Key(type = Key.Type.NUMBER, desc = "Maximum number of file " + BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK + " index method can open at the same time") + public static final String BITMAPTRIPLES_DISK_MAX_FILE_OPEN_KEY = "bitmaptriples.indexmethod.disk.maxFileOpen"; + /** + * Key for the number of chunk layers opened at the same time, by default + *

min(log2(maxFileOpen), chunkSize / (fileBufferSize * compressWorker))

+ */ + @Key(type = Key.Type.NUMBER, desc = "log of the number of way the system can merge in " + BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK + " index method") + public static final String BITMAPTRIPLES_DISK_KWAY_KEY = "bitmaptriples.indexmethod.disk.kway"; + // use tree-map to have a better order private static final Map OPTION_MAP = new TreeMap<>(); static { try { - for (Field f : HDTOptionsKeys.class.getDeclaredFields()) { - Key key = f.getAnnotation(Key.class); - if (key != null) { - String keyValue = (String) f.get(null); - - OPTION_MAP.put(keyValue, new Option(keyValue, key)); - } else { - Value value = f.getAnnotation(Value.class); - if (value != null) { - String valueValue = (String) f.get(null); - Option opt = OPTION_MAP.get(value.key()); - if (opt != null) { - opt.values.add(new OptionValue(valueValue, value)); - } + registerOptionsClass(HDTOptionsKeys.class); + } catch (Exception e) { + throw new Error("Can't load option keys", e); + } + } + + /** + * register an options class for the {@link #getOptionMap()} method, + * will read all the public static fields with {@link Key} and {@link Value} + * + * @param cls class + * @throws Exception register exception + */ + public static void registerOptionsClass(Class cls) throws Exception { + for (Field f : cls.getDeclaredFields()) { + if ((f.getModifiers() & Modifier.STATIC) == 0 + || (f.getModifiers() & Modifier.PUBLIC) == 0) { + continue; // no static modifier + } + Key key = f.getAnnotation(Key.class); + if (key != null) { + String keyValue = String.valueOf(f.get(null)); + + OPTION_MAP.put(keyValue, new Option(keyValue, key)); + } else { + Value value = f.getAnnotation(Value.class); + if (value != null) { + String valueValue = String.valueOf(f.get(null)); + Option opt = OPTION_MAP.get(value.key()); + if (opt != null) { + opt.values.add(new OptionValue(valueValue, value)); } } } - } catch (Exception e) { - throw new Error("Can't load option keys", e); } } diff --git a/hdt-api/src/main/java/org/rdfhdt/hdt/options/Key.java b/hdt-api/src/main/java/org/rdfhdt/hdt/options/Key.java index 03d73d38..af203a40 100644 --- a/hdt-api/src/main/java/org/rdfhdt/hdt/options/Key.java +++ b/hdt-api/src/main/java/org/rdfhdt/hdt/options/Key.java @@ -1,7 +1,9 @@ package org.rdfhdt.hdt.options; +import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; /** * define a key in the HDTOptionsKey class @@ -9,6 +11,7 @@ * @author Antoine Willerval */ @Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) public @interface Key { /** * Type enum for a key diff --git a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java index ba0736b8..a257911d 100644 --- a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java +++ b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HDTCat.java @@ -85,13 +85,13 @@ private HDT cat(String location, HDTOptions spec, ProgressListener listener) thr public void execute() throws IOException { - HDTSpecification spec; - if(configFile!=null) { - spec = new HDTSpecification(configFile); + HDTOptions spec; + if(configFile != null) { + spec = HDTOptions.readFromFile(configFile); } else { - spec = new HDTSpecification(); + spec = HDTOptions.of(); } - if(options!=null) { + if (options != null) { spec.setOptions(options); } diff --git a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java index cf9970fc..8aab9063 100644 --- a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java +++ b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/HdtSearch.java @@ -38,6 +38,7 @@ import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdt.hdt.HDTVersion; import org.rdfhdt.hdt.listener.ProgressListener; +import org.rdfhdt.hdt.options.HDTOptions; import org.rdfhdt.hdt.triples.IteratorTripleString; import org.rdfhdt.hdt.triples.TripleString; import org.rdfhdt.hdt.util.StopWatch; @@ -45,6 +46,7 @@ import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; +import org.rdfhdt.hdt.util.listener.MultiThreadListenerConsole; import static java.nio.charset.StandardCharsets.UTF_8; @@ -53,10 +55,22 @@ * @author mario.arias * */ -public class HdtSearch implements ProgressListener { +public class HdtSearch { @Parameter(description = "") public List parameters = new ArrayList<>(); + @Parameter(names = "-options", description = "HDT Conversion options (override those of config file)") + public String options; + + @Parameter(names = "-config", description = "Conversion config file") + public String configFile; + + @Parameter(names = "-color", description = "Print using color (if available)") + public boolean color; + + @Parameter(names = "-quiet", description = "Do not show progress of the conversion") + public boolean quiet; + @Parameter(names = "-version", description = "Prints the HDT version number") public static boolean showVersion; @@ -67,7 +81,7 @@ public class HdtSearch implements ProgressListener { protected static void iterate(HDT hdt, CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException { StopWatch iterateTime = new StopWatch(); - int count = 0; + int count; subject = subject.length()==1 && subject.charAt(0)=='?' ? "" : subject; predicate = predicate.length()==1 && predicate.charAt(0)=='?' ? "" : predicate; @@ -102,7 +116,7 @@ private void help() { /** * Read from a line, where each component is separated by space. - * @param line + * @param line line to parse */ private static void parseTriplePattern(TripleString dest, String line) throws ParserException { int split, posa, posb; @@ -135,11 +149,25 @@ private static void parseTriplePattern(TripleString dest, String line) throws Pa } public void execute() throws IOException { + HDTOptions spec; + if(configFile != null) { + spec = HDTOptions.readFromFile(configFile); + } else { + spec = HDTOptions.of(); + } + if (options != null) { + spec.setOptions(options); + } + + ProgressListener listenerConsole = + !quiet ? new MultiThreadListenerConsole(color) + : ProgressListener.ignore(); + HDT hdt; if(loadInMemory) { - hdt = HDTManager.loadIndexedHDT(hdtInput, this); + hdt = HDTManager.loadIndexedHDT(hdtInput, listenerConsole, spec); } else { - hdt= HDTManager.mapIndexedHDT(hdtInput, this); + hdt= HDTManager.mapIndexedHDT(hdtInput, spec, listenerConsole); } BufferedReader in = new BufferedReader(new InputStreamReader(System.in, UTF_8)); @@ -176,18 +204,11 @@ public void execute() throws IOException { in.close(); } } - - /* (non-Javadoc) - * @see hdt.ProgressListener#notifyProgress(float, java.lang.String) - */ - @Override - public void notifyProgress(float level, String message) { - //System.out.println(message + "\t"+ Float.toString(level)); - } - + public static void main(String[] args) throws Throwable { HdtSearch hdtSearch = new HdtSearch(); - JCommander com = new JCommander(hdtSearch, args); + JCommander com = new JCommander(hdtSearch); + com.parse(args); com.setProgramName("hdtSearch"); if (showVersion) { diff --git a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java index 0fb09a54..64f6f49b 100644 --- a/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java +++ b/hdt-java-cli/src/main/java/org/rdfhdt/hdt/tools/RDF2HDT.java @@ -40,6 +40,7 @@ import org.rdfhdt.hdt.hdt.HDTSupplier; import org.rdfhdt.hdt.hdt.HDTVersion; import org.rdfhdt.hdt.listener.ProgressListener; +import org.rdfhdt.hdt.options.HDTOptions; import org.rdfhdt.hdt.options.HDTOptionsKeys; import org.rdfhdt.hdt.options.HDTSpecification; import org.rdfhdt.hdt.rdf.RDFFluxStop; @@ -135,11 +136,11 @@ private static long findBestMemoryChunkDiskMapTreeCat() { } public void execute() throws ParserException, IOException { - HDTSpecification spec; - if (configFile != null) { - spec = new HDTSpecification(configFile); + HDTOptions spec; + if(configFile != null) { + spec = HDTOptions.readFromFile(configFile); } else { - spec = new HDTSpecification(); + spec = HDTOptions.of(); } if (options != null) { spec.setOptions(options); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/AdjacencyList.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/AdjacencyList.java index a469d5b2..ddb158b8 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/AdjacencyList.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/AdjacencyList.java @@ -47,7 +47,7 @@ public AdjacencyList(Sequence array, Bitmap bitmap) { this.array = array; this.bitmap = bitmap; if (array.getNumberOfElements() != bitmap.getNumBits()) { - throw new IllegalArgumentException("Adjacency list bitmap and array should have the same size"); + throw new IllegalArgumentException("Adjacency list bitmap and array should have the same size " + array.getNumberOfElements() + "!=" + bitmap.getNumBits()); } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Big.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Big.java index 3209da47..aec75ccf 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Big.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap375Big.java @@ -26,7 +26,6 @@ import org.rdfhdt.hdt.util.io.CloseSuppressPath; import org.rdfhdt.hdt.util.io.Closer; import org.rdfhdt.hdt.util.io.IOUtil; -import org.visnow.jlargearrays.LongLargeArray; import java.io.Closeable; import java.io.IOException; @@ -86,7 +85,7 @@ public static Bitmap375Big memory(long nbits) { * @return bitmap */ public static Bitmap375Big memory(long nbits, Path location) { - return new Bitmap375Big(new LargeLongArray(new LongLargeArray(numWords(nbits))), location, location != null); + return new Bitmap375Big(new LargeLongArray(IOUtil.createLargeArray(numWords(nbits))), location, location != null); } // Constants diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Big.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Big.java index fcaf9ef6..9666ae1b 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Big.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/Bitmap64Big.java @@ -68,7 +68,7 @@ public static Bitmap64Big disk(Path location, long nbits) { * @return bitmap */ public static Bitmap64Big memory(long nbits) { - return new Bitmap64Big(new LargeLongArray(new LongLargeArray(numWords(nbits)))); + return new Bitmap64Big(new LargeLongArray(IOUtil.createLargeArray(numWords(nbits)))); } // Constants @@ -221,8 +221,9 @@ public void append(boolean value) { } public void set(long bitIndex, boolean value) { - if (bitIndex < 0) + if (bitIndex < 0) { throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); + } long wordIndex = wordIndex(bitIndex); try { @@ -354,4 +355,11 @@ public Closer getCloser() { public void close() throws IOException { closer.close(); } + + /** + * @return sync version of this bitmap + */ + public ModifiableBitmap asSync() { + return SyncBitmap.of(this); + } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/SyncBitmap.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/SyncBitmap.java new file mode 100644 index 00000000..08ede526 --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/bitmap/SyncBitmap.java @@ -0,0 +1,141 @@ +package org.rdfhdt.hdt.compact.bitmap; + +import org.rdfhdt.hdt.listener.ProgressListener; +import org.rdfhdt.hdt.util.io.IOUtil; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * sync version of a bitmap + * @param bitmap + */ +public class SyncBitmap implements Bitmap, Closeable { + /** + * create sync bitmap from a bitmap + * @param bitmap bitmap + * @return sync bitmap + */ + public static Bitmap of(Bitmap bitmap) { + if (bitmap instanceof SyncBitmap) { + return bitmap; + } + if (bitmap instanceof ModifiableBitmap) { + return new SyncModBitmap<>((ModifiableBitmap) bitmap); + } + + return new SyncBitmap<>(bitmap); + } + + /** + * create sync mod bitmap from a mod bitmap + * @param bitmap bitmap + * @return sync mod bitmap + */ + public static ModifiableBitmap of(ModifiableBitmap bitmap) { + if (bitmap instanceof SyncBitmap) { + return bitmap; + } + return new SyncModBitmap<>(bitmap); + } + + protected final T bitmap; + + private SyncBitmap(T bitmap) { + this.bitmap = bitmap; + } + + @Override + public synchronized boolean access(long position) { + return bitmap.access(position); + } + + @Override + public synchronized long rank1(long position) { + return bitmap.rank1(position); + } + + @Override + public synchronized long rank0(long position) { + return bitmap.rank0(position); + } + + @Override + public synchronized long selectPrev1(long start) { + return bitmap.selectPrev1(start); + } + + @Override + public synchronized long selectNext1(long start) { + return bitmap.selectNext1(start); + } + + @Override + public synchronized long select0(long n) { + return bitmap.select0(n); + } + + @Override + public synchronized long select1(long n) { + return bitmap.select1(n); + } + + @Override + public synchronized long getNumBits() { + return bitmap.getNumBits(); + } + + @Override + public synchronized long countOnes() { + return bitmap.countOnes(); + } + + @Override + public synchronized long countZeros() { + return bitmap.countZeros(); + } + + @Override + public synchronized long getSizeBytes() { + return bitmap.getSizeBytes(); + } + + @Override + public synchronized void save(OutputStream output, ProgressListener listener) throws IOException { + bitmap.save(output, listener); + } + + @Override + public synchronized void load(InputStream input, ProgressListener listener) throws IOException { + bitmap.load(input, listener); + } + + @Override + public synchronized String getType() { + return bitmap.getType(); + } + + @Override + public void close() throws IOException { + IOUtil.closeObject(bitmap); + } + + private static class SyncModBitmap extends SyncBitmap implements ModifiableBitmap { + + protected SyncModBitmap(T bitmap) { + super(bitmap); + } + + @Override + public synchronized void set(long position, boolean value) { + bitmap.set(position, value); + } + + @Override + public synchronized void append(boolean value) { + bitmap.append(value); + } + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/integer/VByte.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/integer/VByte.java index dd5d5b7e..f46a2ca1 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/integer/VByte.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/integer/VByte.java @@ -39,20 +39,52 @@ /** * Typical implementation of Variable-Byte encoding for integers. - * http://nlp.stanford.edu/IR-book/html/htmledition/variable-byte-codes-1.html - * + * variable-byte-codes + * * The first bit of each byte specifies whether there are more bytes available. * Numbers from 0 to 126 are encoded using just one byte. * Numbers from 127 to 16383 are encoded using two bytes. * Numbers from 16384 to 2097151 are encoded using three bytes. - * + * * @author mario.arias * */ public class VByte { private VByte() {} - + + /** + * encode a Variable-Byte adding a bit for the sign, should be decoded with {@link #decodeSigned(InputStream)} + * @param out out stream + * @param value value to encode + * @throws IOException write exception + */ + public static void encodeSigned(OutputStream out, long value) throws IOException { + if (value < 0) { + // set the 1st bit to 1 + encode(out, ~(value << 1)); + } else { + encode(out, value << 1); + } + } + + /** + * decode a signed Variable-Byte, should be encoded with {@link #encodeSigned(OutputStream, long)} + * @param in in stream + * @return decoded value + * @throws IOException write exception + */ + public static long decodeSigned(InputStream in) throws IOException { + long decode = decode(in); + if ((decode & 1) == 0) { + // + + return decode >>> 1; + } else { + // - + return ~(decode >>> 1); + } + } + public static void encode(OutputStream out, long value) throws IOException { if(value<0) { throw new IllegalArgumentException("Only can encode VByte of positive values"); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/Sequence.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/Sequence.java index 2bd8b5df..d0e907a6 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/Sequence.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/Sequence.java @@ -70,6 +70,10 @@ public interface Sequence extends Closeable { */ long size(); + /** + * @return size of the components (in bits) + */ + int sizeOf(); /** * Saves the array to an OutputStream * diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java index fa31730f..69943a5a 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt32.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Arrays; import java.util.List; import java.util.Iterator; @@ -71,6 +72,11 @@ public void resize(long numentries) { this.numelements = (int) numentries; } + @Override + public void clear() { + Arrays.fill(data, 0); + } + private void resizeArray(int size) { int [] newData = new int[size]; System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length)); @@ -214,7 +220,7 @@ public String toString() { */ @Override public long size() { - return 4*numelements; + return 4L*numelements; } /* (non-Javadoc) diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java index 43f09026..2fb76518 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceInt64.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Arrays; import java.util.Iterator; import org.rdfhdt.hdt.exceptions.IllegalFormatException; @@ -72,6 +73,11 @@ public void resize(long numentries) { this.numelements = numentries; } + @Override + public void clear() { + Arrays.fill(data, 0); + } + private void resizeArray(int size) { long [] newData = new long[size]; System.arraycopy(data, 0, newData, 0, Math.min(newData.length, data.length)); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java index 85846639..b05a3dc2 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64.java @@ -293,6 +293,11 @@ public void resize(long numentries) { resizeArray((int)numWordsFor(numbits, numentries)); } + @Override + public void clear() { + Arrays.fill(data, 0); + } + /* (non-Javadoc) * @see hdt.triples.array.Stream#getNumberOfElements() */ diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java index 16666969..43deec59 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Big.java @@ -57,7 +57,7 @@ public class SequenceLog64Big implements DynamicSequence { LongLargeArray data; private int numbits; - private long numentries=0; + private long numentries; private long maxvalue; public SequenceLog64Big() { @@ -76,7 +76,7 @@ public SequenceLog64Big(int numbits, long capacity) { long size = numWordsFor(numbits, capacity); LongLargeArray.setMaxSizeOf32bitArray(SequenceLog64Big.INDEX); - data = new LongLargeArray(Math.max(size,1)); + data = IOUtil.createLargeArray(Math.max(size,1)); } public SequenceLog64Big(int numbits, long capacity, boolean initialize) { @@ -157,7 +157,7 @@ private void resizeArray(long size) { //data = Arrays.copyOf(data, size); if(size > 0) { if (data.length() != size) { - LongLargeArray a = new LongLargeArray(size); + LongLargeArray a = IOUtil.createLargeArray(size, false); LargeArrayUtils.arraycopy(data, 0, a, 0, Math.min(size, data.length())); data = a; } @@ -186,7 +186,7 @@ public void add(Iterator elements) { // Prepare array numbits = BitUtil.log2(max); long size = numWordsFor(numbits, numentries); - data = new LongLargeArray(size); + data = IOUtil.createLargeArray(size); // Save int count = 0; @@ -198,32 +198,6 @@ public void add(Iterator elements) { } } - public void addIntegers(ArrayList elements) { - long max = 0; - numentries = 0; - - // Count and calculate number of bits needed per element. - for (int i=0;imaxvalue) { - //throw new IllegalArgumentException("Value exceeds the maximum for this data structure"); - //} + if (value < 0 || value > maxvalue) { + throw new IllegalArgumentException("Value exceeds the maximum for this data structure " + value + " > " + maxvalue); + } setField(data, numbits, position, value); } @@ -275,7 +249,9 @@ public void aggressiveTrimToSize() { // Count and calculate number of bits needed per element. for(long i=0; imax ? value : max; + if (value > max) { + max = value; + } } int newbits = BitUtil.log2(max); @@ -309,6 +285,11 @@ public void resize(long numentries) { resizeArray(numWordsFor(numbits, numentries)); } + @Override + public void clear() { + IOUtil.fillLargeArray(data, 0); + } + /* (non-Javadoc) * @see hdt.triples.array.Stream#getNumberOfElements() */ @@ -350,7 +331,6 @@ public void save(OutputStream output, ProgressListener listener) throws IOExcept /* (non-Javadoc) * @see hdt.triples.array.Stream#load(java.io.InputStream, hdt.ProgressListener) */ - @SuppressWarnings("resource") @Override public void load(InputStream input, ProgressListener listener) throws IOException { CRCInputStream in = new CRCInputStream(input, new CRC8()); @@ -373,7 +353,7 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio in.setCRC(new CRC32()); long numwords = numWordsFor(numbits, numentries); - data = new LongLargeArray(numwords); + data = IOUtil.createLargeArray(numwords); for(long i=0;iW)) { mask = ~0L << (bitsField+j-W); @@ -175,14 +176,19 @@ public long get(long position) { //throw new IndexOutOfBoundsException(); // } + if (position < 0 || numWordsFor(numbits, position) > data.length()) { + throw new IndexOutOfBoundsException(position + " < 0 || " + position + " > " + data.length() * 64 / numbits); + } + return getField(data, numbits, position); } @Override public void set(long position, long value) { - if (value<0 || value>maxvalue) { - throw new IllegalArgumentException("Value exceeds the maximum for this data structure"); + if (value < 0 || value > maxvalue) { + throw new IllegalArgumentException("Value exceeds the maximum for this data structure " + value + " > " + maxvalue); } + //System.out.println("numbits "+this.numbits); setField(data, numbits, position, value); } @@ -219,7 +225,9 @@ public void aggressiveTrimToSize() { // Count and calculate number of bits needed per element. for(long i=0; imax ? value : max; + if (value > max) { + max = value; + } } int newbits = BitUtil.log2(max); @@ -264,6 +272,11 @@ public void resize(long numentries) { } } + @Override + public void clear() { + data.clear(); + } + /* (non-Javadoc) * @see hdt.triples.array.Stream#getNumberOfElements() */ @@ -290,7 +303,7 @@ public void save(OutputStream output, ProgressListener listener) throws IOExcept long numwords = numWordsFor(numbits, numentries); for(long i=0;i0) { @@ -331,9 +344,7 @@ public String getType() { @Override public void close() throws IOException { - if (data != null) { - data.close(); - } + IOUtil.closeObject(data); data=null; } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Map.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Map.java index 3bdb5d2c..caa82896 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Map.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/sequence/SequenceLog64Map.java @@ -123,7 +123,12 @@ public SequenceLog64Map(int numbits, long numentries, File f) throws IOException mapFiles(f, 0); } - + + @Override + public int sizeOf() { + return numbits; + } + private void mapFiles(File f, long base) throws IOException { // Read packed data ch = FileChannel.open(Paths.get(f.toString())); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/HDTManagerImpl.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/HDTManagerImpl.java index 84921307..ee4e5930 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/HDTManagerImpl.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/HDTManagerImpl.java @@ -114,7 +114,7 @@ public HDT doLoadIndexedHDT(InputStream hdtFile, ProgressListener listener, HDTO @Override public HDT doIndexedHDT(HDT hdt, ProgressListener listener) throws IOException { - ((HDTPrivate)hdt).loadOrCreateIndex(listener, new HDTSpecification()); + ((HDTPrivate)hdt).loadOrCreateIndex(listener, HDTOptions.of()); return hdt; } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTBase.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTBase.java index 2c2befe0..7bff64d5 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTBase.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/HDTBase.java @@ -33,11 +33,7 @@ public abstract class HDTBase> { + private final CloseSuppressPath baseFileName; + private final AsyncIteratorFetcher source; + private final MultiThreadListener listener; + private final int bufferSize; + private final long chunkSize; + private final int k; + private final Comparator comparator; + private final AtomicLong read = new AtomicLong(); + + public DiskIndexSort(CloseSuppressPath baseFileName, AsyncIteratorFetcher source, MultiThreadListener listener, int bufferSize, long chunkSize, int k, Comparator comparator) { + this.source = source; + this.listener = MultiThreadListener.ofNullable(listener); + this.baseFileName = baseFileName; + this.bufferSize = bufferSize; + this.chunkSize = chunkSize; + this.k = k; + this.comparator = comparator; + } + + @Override + public void createChunk(SizeFetcher flux, CloseSuppressPath output) throws KWayMerger.KWayMergerException { + ParallelSortableArrayList pairs = new ParallelSortableArrayList<>(Pair[].class); + + Pair pair; + // loading the pairs + listener.notifyProgress(10, "reading pairs part 0"); + while ((pair = flux.get()) != null) { + pairs.add(pair); + long r = read.incrementAndGet(); + if (r % 1_000_000 == 0) { + listener.notifyProgress(10, "reading pairs part " + r); + } + } + + // sort the pairs + pairs.parallelSort(comparator); + + // write the result on disk + int count = 0; + int block = pairs.size() < 10 ? 1 : pairs.size() / 10; + IntermediateListener il = new IntermediateListener(listener); + il.setRange(70, 100); + il.notifyProgress(0, "creating file"); + try (PairWriter w = new PairWriter(output.openOutputStream(bufferSize), pairs.size())) { + // encode the size of the chunk + for (int i = 0; i < pairs.size(); i++) { + if (i % block == 0) { + il.notifyProgress(i / (block / 10f), "writing pair " + count + "/" + pairs.size()); + } + w.append(pairs.get(i)); + } + listener.notifyProgress(100, "writing completed " + pairs.size() + " " + output.getFileName()); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException("Can't write chunk", e); + } + } + + @Override + public void mergeChunks(List inputs, CloseSuppressPath output) throws KWayMerger.KWayMergerException { + try { + listener.notifyProgress(0, "merging pairs " + output.getFileName()); + PairReader[] readers = new PairReader[inputs.size()]; + long count = 0; + try { + for (int i = 0; i < inputs.size(); i++) { + readers[i] = new PairReader(inputs.get(i).openInputStream(bufferSize)); + } + + ExceptionIterator it = PairMergeIterator.buildOfTree(readers, comparator); + // at least one + long rSize = it.getSize(); + long size = Math.max(rSize, 1); + long block = size < 10 ? 1 : size / 10; + try (PairWriter w = new PairWriter(output.openOutputStream(bufferSize), rSize)) { + while (it.hasNext()) { + w.append(it.next()); + if (count % block == 0) { + listener.notifyProgress(count / (block / 10f), "merging pairs " + count + "/" + size); + } + count++; + } + } + } finally { + IOUtil.closeAll(readers); + } + listener.notifyProgress(100, "pairs merged " + output.getFileName() + " " + count); + // delete old pairs + IOUtil.closeAll(inputs); + } catch (IOException e) { + throw new KWayMerger.KWayMergerException(e); + } + } + + @Override + public SizeFetcher newStopFlux(Supplier flux) { + return SizeFetcher.of(flux, p -> 3 * Long.BYTES, chunkSize); + } + + /** + * sort the pairs + * + * @param workers number of workers to handle the kway merge + * @return exception iterator, might implement {@link java.io.Closeable}, use {@link IOUtil#closeObject(Object)} if required + * @throws InterruptedException thread interruption + * @throws IOException io exception + * @throws KWayMerger.KWayMergerException exception during the kway merge + */ + public ExceptionIterator sort(int workers) throws InterruptedException, IOException, KWayMerger.KWayMergerException { + listener.notifyProgress(0, "Pair sort asked in " + baseFileName.toAbsolutePath()); + // force to create the first file + KWayMerger> merger = new KWayMerger<>(baseFileName, source, this, Math.max(1, workers - 1), k); + merger.start(); + // wait for the workers to merge the sections and create the triples + Optional sections = merger.waitResult(); + if (sections.isEmpty()) { + return ExceptionIterator.empty(); + } + CloseSuppressPath path = sections.get(); + return new PairReader(path.openInputStream(bufferSize)) { + @Override + public void close() throws IOException { + try { + super.close(); + } finally { + IOUtil.closeObject(path); + } + } + }; + } + +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/diskindex/ObjectAdjReader.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/diskindex/ObjectAdjReader.java new file mode 100644 index 00000000..1c289bc9 --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/hdt/impl/diskindex/ObjectAdjReader.java @@ -0,0 +1,37 @@ +package org.rdfhdt.hdt.hdt.impl.diskindex; + +import org.rdfhdt.hdt.compact.bitmap.Bitmap; +import org.rdfhdt.hdt.compact.sequence.Sequence; +import org.rdfhdt.hdt.iterator.utils.FetcherIterator; +import org.rdfhdt.hdt.util.io.compress.Pair; + +public class ObjectAdjReader extends FetcherIterator { + private final Sequence seqZ, seqY; + private final Bitmap bitmapZ; + private long indexY, indexZ; + + public ObjectAdjReader(Sequence seqZ, Sequence seqY, Bitmap bitmapZ) { + this.seqZ = seqZ; + this.seqY = seqY; + this.bitmapZ = bitmapZ; + } + + @Override + protected Pair getNext() { + if (indexZ >= seqZ.getNumberOfElements()) { + return null; + } + + Pair pair = new Pair(); + // create a pair object + pair.object = seqZ.get(indexZ); + pair.predicatePosition = indexY; + pair.predicate = seqY.get(indexY); + + // shift to the next predicate if required + if (bitmapZ.access(indexZ++)) { + indexY++; + } + return pair; + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/ExceptionIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/ExceptionIterator.java index be1a8799..e23567f9 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/ExceptionIterator.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/ExceptionIterator.java @@ -1,5 +1,6 @@ package org.rdfhdt.hdt.iterator.utils; +import org.rdfhdt.hdt.exceptions.NotImplementedException; import org.rdfhdt.hdt.listener.ProgressListener; import java.util.Iterator; @@ -220,4 +221,11 @@ public void remove() { } }; } + + /** + * @return -1 if undefined, the size of the iterator otherwise + */ + default long getSize() { + return -1; + } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/FetcherIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/FetcherIterator.java new file mode 100644 index 00000000..1ec0c3f6 --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/FetcherIterator.java @@ -0,0 +1,59 @@ +package org.rdfhdt.hdt.iterator.utils; + +import java.util.Iterator; +import java.util.function.Function; + +/** + * Iterator implementation without the next element fetching method + * @param iterator type + */ +public abstract class FetcherIterator implements Iterator { + private T next; + + protected FetcherIterator() { + } + + /** + * @return the next element, or null if it is the end + */ + protected abstract T getNext(); + + @Override + public boolean hasNext() { + if (next != null) { + return true; + } + next = getNext(); + return next != null; + } + + @Override + public T next() { + try { + return peek(); + } finally { + next = null; + } + } + + /** + * @return peek the element without passing to the next element + */ + public T peek() { + if (hasNext()) { + return next; + } + return null; + } + + /** + * map this iterator + * + * @param mappingFunction func + * @param new type + * @return iterator + */ + public Iterator map(Function mappingFunction) { + return new MapIterator<>(this, mappingFunction); + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MapExceptionIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MapExceptionIterator.java index 5eef7e71..ac91171d 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MapExceptionIterator.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MapExceptionIterator.java @@ -35,6 +35,11 @@ public void remove() throws E{ base.remove(); } + @Override + public long getSize() { + return base.getSize(); + } + @FunctionalInterface public interface MapWithIdFunction { N apply(M element, long index) throws E; diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MergeExceptionIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MergeExceptionIterator.java index 0074c5a3..ea418604 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MergeExceptionIterator.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/MergeExceptionIterator.java @@ -187,6 +187,16 @@ public boolean hasNext() throws E { return false; } + @Override + public long getSize() { + long s1 = in1.getSize(); + long s2 = in2.getSize(); + if (s1 == -1 || s2 == -1) { + return -1; + } + return s2 + s1; + } + @Override public T next() throws E { if (!hasNext()) { diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/NotificationExceptionIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/NotificationExceptionIterator.java index ad201859..c3fa523e 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/NotificationExceptionIterator.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/NotificationExceptionIterator.java @@ -54,4 +54,9 @@ public T next() throws E { public void remove() throws E { it.remove(); } + + @Override + public long getSize() { + return it.getSize(); + } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/SizeFetcher.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/SizeFetcher.java index e91bca60..0141a43a 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/SizeFetcher.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/iterator/utils/SizeFetcher.java @@ -13,11 +13,15 @@ */ public class SizeFetcher implements Supplier { public static SizeFetcher ofTripleString(Supplier supplier, long maxSize) { - return new SizeFetcher<>(supplier, FileTripleIterator::estimateSize, maxSize); + return of(supplier, FileTripleIterator::estimateSize, maxSize); } public static SizeFetcher ofTripleLong(Supplier supplier, long maxSize) { - return new SizeFetcher<>(supplier, tripleID -> 4L * Long.BYTES, maxSize); + return of(supplier, tripleID -> 4L * Long.BYTES, maxSize); + } + + public static SizeFetcher of(Supplier supplier, ToLongFunction sizeGetter, long maxSize) { + return new SizeFetcher<>(supplier, sizeGetter, maxSize); } private final Supplier supplier; diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HDTSpecification.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HDTSpecification.java index 8cb77ac1..628967e8 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HDTSpecification.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HDTSpecification.java @@ -27,8 +27,12 @@ package org.rdfhdt.hdt.options; -import java.io.FileInputStream; +import org.rdfhdt.hdt.util.io.IOUtil; + import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; /** * Holds the properties of HDT from a configuration file @@ -38,8 +42,6 @@ public class HDTSpecification extends HDTOptionsBase { /** * Default constructor, reads the file config.properties - * - * */ public HDTSpecification() { super(); @@ -47,23 +49,38 @@ public HDTSpecification() { /** * Constructor that reads a specific filename - * - * @param filename - * @throws IOException + * + * @param filename file + * @throws IOException load io exception */ public HDTSpecification(String filename) throws IOException { super(); load(filename); } + /** + * Constructor that reads a specific filename + * + * @param filename file + * @throws IOException load io exception + */ + public HDTSpecification(Path filename) throws IOException { + super(); + load(filename); + } + + @Override public void load(String filename) throws IOException { - FileInputStream fin = new FileInputStream(filename); - try { - properties.load(fin); - } finally { - fin.close(); + try (InputStream is = IOUtil.getFileInputStream(filename)) { + properties.load(is); } + } + @Override + public void load(Path filename) throws IOException { + try (InputStream is = Files.newInputStream(filename)) { + properties.load(is); + } } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HideHDTOptions.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HideHDTOptions.java index d6728717..a66e82ef 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HideHDTOptions.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/options/HideHDTOptions.java @@ -13,6 +13,12 @@ public class HideHDTOptions implements HDTOptions { private final Function mapper; private final Map customOptions = new HashMap<>(); + /** + * @param spec wrapped options + */ + public HideHDTOptions(HDTOptions spec) { + this(spec, Function.identity()); + } /** * @param spec wrapped options * @param mapper mapping function (key) {@literal ->} newKey? @@ -37,7 +43,7 @@ public void overrideValue(String key, Object value) { } @Override - public Set getKeys() { + public Set getKeys() { return spec.getKeys(); } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/TriplesPrivate.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/TriplesPrivate.java index 531727f1..e935b323 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/TriplesPrivate.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/TriplesPrivate.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.OutputStream; +import org.rdfhdt.hdt.dictionary.Dictionary; import org.rdfhdt.hdt.enums.TripleComponentOrder; import org.rdfhdt.hdt.iterator.SuppliableIteratorTripleID; import org.rdfhdt.hdt.listener.ProgressListener; @@ -46,7 +47,7 @@ public interface TriplesPrivate extends Triples { * Generates the associated Index * @param listener */ - void generateIndex(ProgressListener listener, HDTOptions spec) throws IOException; + void generateIndex(ProgressListener listener, HDTOptions spec, Dictionary dictionary) throws IOException; /** * Loads the associated Index from an InputStream diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java index c2a9e481..fadef165 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/triples/impl/BitmapTriples.java @@ -35,12 +35,19 @@ import org.rdfhdt.hdt.compact.sequence.SequenceLog64; import org.rdfhdt.hdt.compact.sequence.SequenceLog64Big; import org.rdfhdt.hdt.compact.sequence.SequenceLog64BigDisk; +import org.rdfhdt.hdt.dictionary.Dictionary; import org.rdfhdt.hdt.enums.TripleComponentOrder; import org.rdfhdt.hdt.exceptions.IllegalFormatException; import org.rdfhdt.hdt.hdt.HDTVocabulary; +import org.rdfhdt.hdt.hdt.impl.HDTDiskImporter; +import org.rdfhdt.hdt.hdt.impl.diskindex.DiskIndexSort; +import org.rdfhdt.hdt.hdt.impl.diskindex.ObjectAdjReader; import org.rdfhdt.hdt.header.Header; import org.rdfhdt.hdt.iterator.SequentialSearchIteratorTripleID; import org.rdfhdt.hdt.iterator.SuppliableIteratorTripleID; +import org.rdfhdt.hdt.iterator.utils.AsyncIteratorFetcher; +import org.rdfhdt.hdt.iterator.utils.ExceptionIterator; +import org.rdfhdt.hdt.listener.MultiThreadListener; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.options.*; import org.rdfhdt.hdt.triples.IteratorTripleID; @@ -49,23 +56,21 @@ import org.rdfhdt.hdt.triples.TriplesPrivate; import org.rdfhdt.hdt.util.BitUtil; import org.rdfhdt.hdt.util.StopWatch; +import org.rdfhdt.hdt.util.concurrent.KWayMerger; import org.rdfhdt.hdt.util.io.CloseSuppressPath; +import org.rdfhdt.hdt.util.io.Closer; import org.rdfhdt.hdt.util.io.CountInputStream; import org.rdfhdt.hdt.util.io.IOUtil; +import org.rdfhdt.hdt.util.io.compress.Pair; import org.rdfhdt.hdt.util.listener.IntermediateListener; import org.rdfhdt.hdt.util.listener.ListenerUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -106,11 +111,11 @@ public BitmapTriples(HDTOptions spec) throws IOException { loadDiskSequence(spec); - bitmapY = BitmapFactory.createBitmap(spec.get("bitmap.y")); - bitmapZ = BitmapFactory.createBitmap(spec.get("bitmap.z")); + bitmapY = BitmapFactory.createBitmap(spec.get(HDTOptionsKeys.BITMAPTRIPLES_BITMAP_Y)); + bitmapZ = BitmapFactory.createBitmap(spec.get(HDTOptionsKeys.BITMAPTRIPLES_BITMAP_Z)); - seqY = SequenceFactory.createStream(spec.get("seq.y")); - seqZ = SequenceFactory.createStream(spec.get("seq.z")); + seqY = SequenceFactory.createStream(spec.get(HDTOptionsKeys.BITMAPTRIPLES_SEQ_Y)); + seqZ = SequenceFactory.createStream(spec.get(HDTOptionsKeys.BITMAPTRIPLES_SEQ_Z)); adjY = new AdjacencyList(seqY, bitmapY); adjZ = new AdjacencyList(seqZ, bitmapZ); @@ -174,8 +179,8 @@ public void load(IteratorTripleID it, ProgressListener listener) { long number = it.estimatedNumResults(); - DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number); - DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number); + DynamicSequence vectorY = new SequenceLog64Big(BitUtil.log2(number), number + 1); + DynamicSequence vectorZ = new SequenceLog64Big(BitUtil.log2(number), number + 1); ModifiableBitmap bitY = Bitmap375Big.memory(number); ModifiableBitmap bitZ = Bitmap375Big.memory(number); @@ -473,7 +478,7 @@ public void close() throws IOException { } } - public ModifiableBitmap createBitmap375(Path baseDir, String name, long size) { + public Bitmap375Big createBitmap375(Path baseDir, String name, long size) { if (diskSequence) { Path path = baseDir.resolve(name); Bitmap375Big bm = Bitmap375Big.disk(path, size, diskSubIndex); @@ -484,7 +489,174 @@ public ModifiableBitmap createBitmap375(Path baseDir, String name, long size) { } } - private void createIndexObjectMemoryEfficient(HDTOptions specIndex) throws IOException { + static long getMaxChunkSizeDiskIndex(int workers) { + return (long) (HDTDiskImporter.getAvailableMemory() * 0.85 / (3L * workers)); + } + + private void createIndexObjectDisk(HDTOptions spec, Dictionary dictionary, ProgressListener plistener) throws IOException { + MultiThreadListener listener = ListenerUtil.multiThreadListener(plistener); + StopWatch global = new StopWatch(); + // load the config + Path diskLocation; + if (diskSequence) { + diskLocation = diskSequenceLocation.createOrGetPath(); + } else { + diskLocation = Files.createTempDirectory("bitmapTriples"); + } + int workers = (int) spec.getInt( + HDTOptionsKeys.BITMAPTRIPLES_DISK_WORKER_KEY, + Runtime.getRuntime()::availableProcessors + ); + // check and set default values if required + if (workers <= 0) { + throw new IllegalArgumentException("Number of workers should be positive!"); + } + long chunkSize = spec.getInt( + HDTOptionsKeys.BITMAPTRIPLES_DISK_CHUNK_SIZE_KEY, + () -> getMaxChunkSizeDiskIndex(workers) + ); + if (chunkSize < 0) { + throw new IllegalArgumentException("Negative chunk size!"); + } + long maxFileOpenedLong = spec.getInt( + HDTOptionsKeys.BITMAPTRIPLES_DISK_MAX_FILE_OPEN_KEY, + 1024 + ); + int maxFileOpened; + if (maxFileOpenedLong < 0 || maxFileOpenedLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("maxFileOpened should be positive!"); + } else { + maxFileOpened = (int) maxFileOpenedLong; + } + long kwayLong = spec.getInt( + HDTOptionsKeys.BITMAPTRIPLES_DISK_KWAY_KEY, + () -> Math.max(1, BitUtil.log2(maxFileOpened / workers)) + ); + int k; + if (kwayLong <= 0 || kwayLong > Integer.MAX_VALUE) { + throw new IllegalArgumentException("kway can't be negative!"); + } else { + k = 1 << ((int) kwayLong); + } + long bufferSizeLong = spec.getInt(HDTOptionsKeys.BITMAPTRIPLES_DISK_BUFFER_SIZE_KEY, CloseSuppressPath.BUFFER_SIZE); + int bufferSize; + if (bufferSizeLong > Integer.MAX_VALUE - 5L || bufferSizeLong <= 0) { + throw new IllegalArgumentException("Buffer size can't be negative or bigger than the size of an array!"); + } else { + bufferSize = (int) bufferSizeLong; + } + + // start the indexing + DiskIndexSort sort = new DiskIndexSort( + CloseSuppressPath.of(diskLocation).resolve("chunks"), + new AsyncIteratorFetcher<>(new ObjectAdjReader(seqZ, seqY, bitmapZ)), + listener, + bufferSize, + chunkSize, + k, + Comparator.comparingLong(p -> p.object).thenComparingLong(p -> p.predicate) + ); + + // Serialize + DynamicSequence indexZ = null; + ModifiableBitmap bitmapIndexZ = null; + DynamicSequence predCount = null; + + global.reset(); + + try { + try { + ExceptionIterator sortedPairs = sort.sort(workers); + + log.info("Pair sorted in {}", global.stopAndShow()); + + global.reset(); + indexZ = createSequence64(diskLocation, "indexZ", BitUtil.log2(seqY.getNumberOfElements()), seqZ.getNumberOfElements()); + bitmapIndexZ = createBitmap375(diskLocation, "bitmapIndexZ", seqZ.getNumberOfElements()); + try { + long lastObj = -2; + long index = 0; + + long size = Math.max(seqZ.getNumberOfElements(), 1); + long block = size < 10 ? 1 : size / 10; + + while (sortedPairs.hasNext()) { + Pair pair = sortedPairs.next(); + + long object = pair.object; + long y = pair.predicatePosition; + + if (lastObj == object - 1) { + // fill the bitmap index to denote the new object + bitmapIndexZ.set(index - 1, true); + } else if (!(lastObj == object)) { + // non increasing Z? + if (lastObj == -2) { + if (object != 1) { + throw new IllegalArgumentException("Pair object start after 1! " + object); + } + // start, ignore + } else { + throw new IllegalArgumentException("Non 1 increasing object! lastObj: " + lastObj + ", object: " + object); + } + } + lastObj = object; + + // fill the sequence with the predicate id + if (index % block == 0) { + listener.notifyProgress(index / (block / 10f), "writing bitmapIndexZ/indexZ " + index + "/" + size); + } + indexZ.set(index, y); + index++; + } + listener.notifyProgress(100, "indexZ completed " + index); + bitmapIndexZ.set(index - 1, true); + } finally { + IOUtil.closeObject(sortedPairs); + } + + log.info("indexZ/bitmapIndexZ completed in {}", global.stopAndShow()); + } catch (KWayMerger.KWayMergerException | InterruptedException e) { + if (e.getCause() != null) { + IOUtil.throwIOOrRuntime(e.getCause()); + } + throw new RuntimeException("Can't sort pairs", e); + } + + global.reset(); + predCount = createSequence64(diskLocation, "predCount", BitUtil.log2(seqY.getNumberOfElements()), dictionary.getNpredicates()); + + long size = Math.max(seqY.getNumberOfElements(), 1); + long block = size < 10 ? 1 : size / 10; + + for (long i = 0; i < seqY.getNumberOfElements(); i++) { + // Read value + long val = seqY.get(i); + + if (i % block == 0) { + listener.notifyProgress(i / (block / 10f), "writing predCount " + i + "/" + size); + } + // Increment + predCount.set(val - 1, predCount.get(val - 1) + 1); + } + predCount.trimToSize(); + listener.notifyProgress(100, "predCount completed " + seqY.getNumberOfElements()); + log.info("Predicate count completed in {}", global.stopAndShow()); + } catch (Throwable t) { + try { + throw t; + } finally { + Closer.closeAll(indexZ, bitmapIndexZ, predCount); + } + } + this.predicateCount = predCount; + this.indexZ = indexZ; + this.bitmapIndexZ = bitmapIndexZ; + this.adjIndex = new AdjacencyList(this.indexZ, this.bitmapIndexZ); + log.info("Index generated in {}", global.stopAndShow()); + } + + private void createIndexObjectMemoryEfficient() throws IOException { Path diskLocation; if (diskSequence) { diskLocation = diskSequenceLocation.createOrGetPath(); @@ -645,9 +817,7 @@ public String toString() { throw t; } finally { try { - if (bitmapIndex instanceof Closeable) { - ((Closeable) bitmapIndex).close(); - } + IOUtil.closeObject(bitmapIndex); } finally { try { if (objectArray != null) { @@ -673,15 +843,13 @@ public String toString() { log.info("Index generated in {}", global.stopAndShow()); } - @SuppressWarnings("unused") private void createIndexObjects() { - // FIXME: Fast but very memory inefficient. class Pair { int valueY; int positionY; } - ArrayList> list=new ArrayList>(); + ArrayList> list=new ArrayList<>(); System.out.println("Generating HDT Index for ?PO, and ??O queries."); // Generate lists @@ -696,7 +864,7 @@ class Pair { if(list.size()<=(int)valueZ) { list.ensureCapacity((int)valueZ); while(list.size()(1)); + list.add(new ArrayList<>(1)); } } @@ -724,12 +892,7 @@ class Pair { List inner = list.get(i); // Sort by Y - Collections.sort(inner, new Comparator() { - @Override - public int compare(Pair o1, Pair o2) { - return o1.valueY-o2.valueY; - } - }); + inner.sort(Comparator.comparingInt(o -> o.valueY)); // Serialize for(int j=0;j 0) { if (array.length() != newSize) { - LongLargeArray a = new LongLargeArray(newSize); + LongLargeArray a = IOUtil.createLargeArray(newSize, false); LargeArrayUtils.arraycopy(array, 0, a, 0, Math.min(newSize, array.length())); array = a; } } } + + @Override + public void clear() { + IOUtil.fillLargeArray(array, 0); + } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArray.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArray.java index 37e614a4..bb4109aa 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArray.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArray.java @@ -5,7 +5,7 @@ /** * Describe a large array of longs */ -public interface LongArray{ +public interface LongArray { /** * get an element at a particular index * @@ -39,4 +39,16 @@ public interface LongArray{ * @throws IOException io exception */ void resize(long newSize) throws IOException; + + /** + * clear the long array, ie: set 0s + */ + void clear(); + + /** + * @return sync version of this long array, might return this if this LongArray is already a sync array + */ + default LongArray asSync() { + return SyncLongArray.of(this); + } } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArrayDisk.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArrayDisk.java index f3c41023..2032d6ca 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArrayDisk.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/LongArrayDisk.java @@ -20,21 +20,16 @@ package org.rdfhdt.hdt.util.disk; -import org.rdfhdt.hdt.util.BitUtil; import org.rdfhdt.hdt.util.io.CloseMappedByteBuffer; import org.rdfhdt.hdt.util.io.IOUtil; import java.io.Closeable; import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.Arrays; -import java.util.Objects; -import java.util.stream.Collectors; //Implementing an array of longs that is backed up on disk. Following this: http://vanillajava.blogspot.fr/2011/12/using-memory-mapped-file-for-huge.html @@ -46,7 +41,7 @@ public class LongArrayDisk implements Closeable, LongArray { private final Path location; public LongArrayDisk(String location, long size) { - this(Path.of(location), size); + this(location, size, true); } public LongArrayDisk(String location, long size, boolean overwrite) { @@ -81,7 +76,7 @@ public LongArrayDisk(Path location, long size, boolean overwrite) { mappings[block] = IOUtil.mapChannel(location.toAbsolutePath().toString(), channel, FileChannel.MapMode.READ_WRITE, block * MAPPING_SIZE, sizeMapping); } if (overwrite) { - set0(0, this.size); + clear(); } } catch (IOException e) { try { @@ -121,10 +116,6 @@ public long get(long x) { return mappings[block].getLong(offset); } - public long getLong(long x) { - return this.get(x); - } - @Override public void set(long index, long value) { if (index >= size || index < 0) { @@ -147,8 +138,8 @@ public int sizeOf() { } public void set0(long startIndex, long endIndex) { - long start = startIndex * 8L; - long end = endIndex * 8L; + long start = startIndex * 8; + long end = endIndex * 8; if (start >= end) { return; @@ -188,12 +179,14 @@ public void set0(long startIndex, long endIndex) { int toWrite = endBuffer - startBuffer; + int shift = 0; while (toWrite > 0) { - mapping.position(startBuffer); + mapping.position(startBuffer + shift); int w = Math.min(toWrite, zeros.length); mapping.put(zeros, 0, w); + shift += w; toWrite -= w; c += w; if (c > 10000000) { @@ -256,6 +249,11 @@ public void resize(long newSize) throws IOException { } } + @Override + public void clear() { + set0(0, length()); + } + /** * @return the location of the array disk */ diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArray.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArray.java index 8de8831c..416cc5c8 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArray.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArray.java @@ -49,7 +49,7 @@ public static SimpleSplitLongArray int64Array(long size) { } public static SimpleSplitLongArray intXArray(long size, int x) { - return new SimpleSplitLongArray(new LargeLongArray(new LongLargeArray(1 + size / (64 / x))), x, size); + return new SimpleSplitLongArray(new LargeLongArray(IOUtil.createLargeArray(1 + size / (64 / x))), x, size); } public static SimpleSplitLongArray int8ArrayDisk(Path location, long size) { @@ -108,6 +108,11 @@ public void resize(long newSize) throws IOException { array.resize(newSize / (64 / numbits)); } + @Override + public void clear() { + array.clear(); + } + @Override public void close() throws IOException { IOUtil.closeObject(array); diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SyncLongArray.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SyncLongArray.java new file mode 100644 index 00000000..a1173aa6 --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/disk/SyncLongArray.java @@ -0,0 +1,59 @@ +package org.rdfhdt.hdt.util.disk; + +import java.io.IOException; + +/** + * sync a long array + * + * @author Antoine Willerval + */ +public class SyncLongArray implements LongArray { + /** + * Sync a long array + * + * @param other the array + * @return sync version of the long array, might return the array if already sync + */ + public static SyncLongArray of(LongArray other) { + if (other instanceof SyncLongArray) { + return (SyncLongArray) other; + } + return new SyncLongArray(other); + } + + private final LongArray array; + + private SyncLongArray(LongArray array) { + this.array = array; + } + + @Override + public synchronized long get(long index) { + return array.get(index); + } + + @Override + public synchronized void set(long index, long value) { + array.set(index, value); + } + + @Override + public synchronized long length() { + return array.length(); + } + + @Override + public synchronized int sizeOf() { + return array.sizeOf(); + } + + @Override + public synchronized void resize(long newSize) throws IOException { + array.resize(newSize); + } + + @Override + public synchronized void clear() { + array.clear(); + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java index 3bf15ae7..42de8942 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/IOUtil.java @@ -28,15 +28,17 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; +import org.apache.commons.math3.util.FastMath; import org.rdfhdt.hdt.compact.integer.VByte; import org.rdfhdt.hdt.enums.CompressionType; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.util.string.ByteString; import org.rdfhdt.hdt.util.string.ByteStringUtil; +import org.visnow.jlargearrays.ConcurrencyUtils; import org.visnow.jlargearrays.LargeArrayUtils; +import org.visnow.jlargearrays.LongLargeArray; import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.Closeable; @@ -59,6 +61,8 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.zip.GZIPInputStream; import java.nio.ByteBuffer; @@ -108,6 +112,86 @@ public static CloseMappedByteBuffer mapChannel(String filename, FileChannel ch, return new CloseMappedByteBuffer(filename, ch.map(mode, position, size), false); } + /** + * create a large array filled with 0 + * @param size size + * @return array + */ + public static LongLargeArray createLargeArray(long size) { + return createLargeArray(size, true); + } + /** + * create a large array + * @param size size + * @param init is the array filled with 0 or not + * @return array + */ + public static LongLargeArray createLargeArray(long size, boolean init) { + if (init) { + return createLargeArray(size, 0); + } + return new LongLargeArray(size, false); + } + + /** + * create a large array with an initial value + * @param size size + * @param initialValue initial value to fill the array + * @return array + */ + public static LongLargeArray createLargeArray(long size, long initialValue) { + LongLargeArray array = new LongLargeArray(size, false); + fillLargeArray(array, initialValue); + return array; + } + + /** + * Set long large array all values, faster than default implementation because there is a bug. + * + * @param array array + * @param initValue initialization value + */ + public static void fillLargeArray(LongLargeArray array, long initValue) { + fillLargeArray(array, 0, array.length(), initValue); + } + + /** + * Set long large array all values, faster than default implementation because there is a bug. + * + * @param array array + * @param start start (inclusive) + * @param end end index (exclusive) + * @param initValue initialization value + */ + public static void fillLargeArray(LongLargeArray array, long start, long end, long initValue) { + if (start >= end) { + return; + } + long length = end - start; + final int nthreads = (int) FastMath.min(length, ConcurrencyUtils.getNumberOfThreads()); + if (nthreads <= 2 || length < ConcurrencyUtils.getConcurrentThreshold() || !array.isLarge()) { + for (long k = 0; k < length; k++) { + array.setLong(k, initValue); + } + } else { + final long perThreadElem = length / nthreads; + final Future[] threads = new Future[nthreads]; + for (int thread = 0; thread < nthreads; thread++) { + final long firstIdx = start + thread * perThreadElem; + final long lastIdx = (thread == nthreads - 1) ? end : (firstIdx + perThreadElem); + threads[thread] = ConcurrencyUtils.submit(() -> { + for (long k1 = firstIdx; k1 < lastIdx; k1++) { + array.setLong(k1, initValue); + } + }); + } + try { + ConcurrencyUtils.waitForCompletion(threads); + } catch (InterruptedException | ExecutionException ex) { + throw new IllegalStateException(ex); + } + } + } /** * call all the close method and merge the exceptions by suppressing them (if multiple) * @@ -186,7 +270,12 @@ public static void closeAll(Iterable closeables) throws IOE throwIOOrRuntime(main); } - private static void throwIOOrRuntime(Throwable t) throws IOException { + /** + * throw this throwable as a {@link IOException} or as a {@link RuntimeException} + * @param t throwable + * @throws IOException t + */ + public static void throwIOOrRuntime(Throwable t) throws IOException { if (t instanceof IOException) { throw (IOException) t; } @@ -410,7 +499,7 @@ public static byte[] intToByteArray(int value) { * * @param in input * @return integer - * @throws IOException + * @throws IOException io exception */ public static int readInt(InputStream in) throws IOException { int ch1 = in.read(); @@ -419,16 +508,16 @@ public static int readInt(InputStream in) throws IOException { int ch4 = in.read(); if ((ch1 | ch2 | ch3 | ch4) < 0) throw new EOFException(); - return (ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0); + return (ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1); } /** * Convert byte array to int, little endian * - * @param value + * @param value io exception */ public static int byteArrayToInt(byte[] value) { - return (value[3] << 24) + (value[2] << 16) + (value[1] << 8) + (value[0] << 0); + return (value[3] << 24) + (value[2] << 16) + (value[1] << 8) + (value[0]); } public static byte[] readSizedBuffer(InputStream input, ProgressListener listener) throws IOException { @@ -441,7 +530,7 @@ public static byte[] readSizedBuffer(InputStream input, ProgressListener listene /** * @param input din * @param length bytes - * @param listener + * @param listener listener */ public static byte[] readBuffer(InputStream input, int length, ProgressListener listener) throws IOException { int nRead; diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/CompressNodeReader.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/CompressNodeReader.java index 8008ad70..0a0cf4bc 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/CompressNodeReader.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/CompressNodeReader.java @@ -43,6 +43,7 @@ public CompressNodeReader(InputStream stream) throws IOException { consumer = DebugOrderNodeIterator.of("stream", true); } + @Override public long getSize() { return size; } diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/Pair.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/Pair.java new file mode 100644 index 00000000..bd7e04dc --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/Pair.java @@ -0,0 +1,34 @@ +package org.rdfhdt.hdt.util.io.compress; + +public class Pair implements Cloneable { + public long predicatePosition; + public long object; + public long predicate; + + public void setAll(long predicatePosition, long object, long predicate) { + this.predicatePosition = predicatePosition; + this.object = object; + this.predicate = predicate; + } + + public void setAll(Pair pair) { + predicatePosition = pair.predicatePosition; + object = pair.object; + predicate = pair.predicate; + } + + public void increaseAll(long predicatePosition, long object, long predicate) { + this.predicatePosition += predicatePosition; + this.object += object; + this.predicate += predicate; + } + + @Override + public Pair clone() { + try { + return (Pair) super.clone(); + } catch (CloneNotSupportedException e) { + throw new AssertionError(); + } + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairMergeIterator.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairMergeIterator.java new file mode 100644 index 00000000..5878543f --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairMergeIterator.java @@ -0,0 +1,19 @@ +package org.rdfhdt.hdt.util.io.compress; + +import org.rdfhdt.hdt.iterator.utils.ExceptionIterator; +import org.rdfhdt.hdt.iterator.utils.MergeExceptionIterator; + +import java.io.IOException; +import java.util.Comparator; + +public class PairMergeIterator extends MergeExceptionIterator { + + public PairMergeIterator(ExceptionIterator in1, ExceptionIterator in2, Comparator comparator) { + super(in1, in2, comparator); + } + + public static > ExceptionIterator buildOfTree( + T[] lst, Comparator comparator) { + return buildOfTree(it -> it, comparator, lst, 0, lst.length); + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairReader.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairReader.java new file mode 100644 index 00000000..35d7ceef --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairReader.java @@ -0,0 +1,86 @@ +package org.rdfhdt.hdt.util.io.compress; + +import org.rdfhdt.hdt.compact.integer.VByte; +import org.rdfhdt.hdt.exceptions.CRCException; +import org.rdfhdt.hdt.iterator.utils.ExceptionIterator; +import org.rdfhdt.hdt.util.crc.CRC32; +import org.rdfhdt.hdt.util.crc.CRCInputStream; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; + +/** + * Reader for file wrote with {@link PairWriter} + * + * @author Antoine Willerval + */ +public class PairReader implements ExceptionIterator, Closeable { + private final CRCInputStream stream; + private final Pair next = new Pair(); + private boolean read = false, end = false; + private final long size; + private long index; + + public PairReader(InputStream stream) throws IOException { + this.stream = new CRCInputStream(stream, new CRC32()); + size = VByte.decode(this.stream); + } + + public long getSize() { + return size; + } + + @Override + public boolean hasNext() throws IOException { + if (read) { + return true; + } + + // the reader is empty, null end triple + if (end) { + return false; + } + + if (index == size) { + end = true; + if (!stream.readCRCAndCheck()) { + throw new CRCException("CRC Error while reading PreMapped pairs."); + } + return false; + } + + index++; + + long p = VByte.decodeSigned(stream); + long v = VByte.decodeSigned(stream); + long pred = VByte.decodeSigned(stream); + + return !setAllOrEnd(p, v, pred); + } + + private boolean setAllOrEnd(long p, long v, long pred) { + if (end) { + // already completed + return true; + } + // map the triples to the end id, compute the shared with the end shared size + next.increaseAll(p, v, pred); + read = true; + return false; + } + + @Override + public Pair next() throws IOException { + if (!hasNext()) { + return null; + } + read = false; + return next; + } + + @Override + public void close() throws IOException { + stream.close(); + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairWriter.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairWriter.java new file mode 100644 index 00000000..29cdc9a0 --- /dev/null +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/PairWriter.java @@ -0,0 +1,63 @@ +package org.rdfhdt.hdt.util.io.compress; + +import org.rdfhdt.hdt.compact.integer.VByte; +import org.rdfhdt.hdt.util.crc.CRC32; +import org.rdfhdt.hdt.util.crc.CRCOutputStream; + +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Writer for {@link Pair}, if the stream is stored, the file size will be reduced + * + * @author Antoine Willerval + */ +public class PairWriter implements Closeable { + private final CRCOutputStream out; + private final long size; + private long index; + private final Pair lastValue = new Pair(); + + public PairWriter(OutputStream writer, long size) throws IOException { + if (size < 0) { + throw new IllegalArgumentException("Negative size!"); + } + this.out = new CRCOutputStream(writer, new CRC32()); + VByte.encode(this.out, size); + this.size = size; + } + + public void append(Pair pair) throws IOException { + if (index >= size) { + throw new IllegalArgumentException("add more elements than size!"); + } + // encode the delta with the previous pair to reduce the size used by the pair on disk + // if the stream isn't sorted, it will add at worse 3 bits / pair, if the stream is sorted, + // the file's size will be drastically reduced + VByte.encodeSigned(out, pair.predicatePosition - lastValue.predicatePosition); + VByte.encodeSigned(out, pair.object - lastValue.object); + VByte.encodeSigned(out, pair.predicate - lastValue.predicate); + + // save previous value + lastValue.setAll(pair); + + index++; + } + + public void writeCRC() throws IOException { + out.writeCRC(); + } + + @Override + public void close() throws IOException { + try { + if (index != size) { + throw new IllegalArgumentException("less elements than size were added!"); + } + writeCRC(); + } finally { + out.close(); + } + } +} diff --git a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/WriteLongArrayBuffer.java b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/WriteLongArrayBuffer.java index 79d1bdc8..5585bbef 100644 --- a/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/WriteLongArrayBuffer.java +++ b/hdt-java-core/src/main/java/org/rdfhdt/hdt/util/io/compress/WriteLongArrayBuffer.java @@ -43,6 +43,7 @@ public WriteLongArrayBuffer(LongArray array, long maxValue, int maxElement) { /** * clear all the elements */ + @Override public void clear() { index = 0; } diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/array/LongArrayTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/array/LongArrayTest.java index 29232ea5..d04fd442 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/array/LongArrayTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/array/LongArrayTest.java @@ -9,11 +9,14 @@ import java.util.Random; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.rdfhdt.hdt.compact.sequence.SequenceInt64; +import org.rdfhdt.hdt.util.disk.LongArrayDisk; public class LongArrayTest { - + private static final int numentries = 10000; SequenceInt64 array; long [] plain; diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java index 4eeaee88..510bcbc2 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/compact/sequence/LargeArrayTest.java @@ -1,6 +1,7 @@ package org.rdfhdt.hdt.compact.sequence; import org.junit.Test; +import org.rdfhdt.hdt.util.io.IOUtil; import org.visnow.jlargearrays.LargeArray; import org.visnow.jlargearrays.LongLargeArray; @@ -12,7 +13,7 @@ public void allocationTest() { try { LargeArray.setMaxSizeOf32bitArray(100); long size = LargeArray.getMaxSizeOf32bitArray() + 2L; - new LongLargeArray(size); + IOUtil.createLargeArray(size, false); } finally { LargeArray.setMaxSizeOf32bitArray(old); } diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/dictionary/impl/kcat/KCatMergerTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/dictionary/impl/kcat/KCatMergerTest.java index bc6c21b9..44ee1834 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/dictionary/impl/kcat/KCatMergerTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/dictionary/impl/kcat/KCatMergerTest.java @@ -21,7 +21,6 @@ import org.rdfhdt.hdt.hdt.HDTManager; import org.rdfhdt.hdt.hdt.HDTManagerTest; import org.rdfhdt.hdt.options.HDTOptions; -import org.rdfhdt.hdt.options.HDTOptionsBase; import org.rdfhdt.hdt.options.HDTOptionsKeys; import org.rdfhdt.hdt.util.LargeFakeDataSetStreamSupplier; import org.rdfhdt.hdt.util.concurrent.SyncSeq; @@ -84,7 +83,7 @@ private void writeSection(DictionarySection sec, OutputStream stream) throws IOE } private DictionarySection loadSection(InputStream stream) throws IOException { - PFCDictionarySection section = new PFCDictionarySection(new HDTOptionsBase()); + PFCDictionarySection section = new PFCDictionarySection(HDTOptions.EMPTY); section.load(stream, null); return section; } @@ -92,7 +91,7 @@ private DictionarySection loadSection(InputStream stream) throws IOException { private Map loadMultiSection(List seq, InputStream stream) throws IOException { Map sectionMap = new TreeMap<>(); for (CharSequence key : seq) { - PFCDictionarySection section = new PFCDictionarySection(new HDTOptionsBase()); + PFCDictionarySection section = new PFCDictionarySection(HDTOptions.EMPTY); section.load(stream, null); sectionMap.put(ByteString.of(key), section); } @@ -103,7 +102,7 @@ private Map loadMultiSection(List params() { + return List.of( + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_OPTIMIZED + ); + } - BitmapTriples bitmapTriples = (BitmapTriples) triples; + @Parameterized.Parameter + public String indexMethod; - if (disk) { - assertNotNull(bitmapTriples.diskSequenceLocation); - assertTrue(bitmapTriples.diskSequence); - } + public void diskBitmapIndexTest(boolean map, boolean disk) throws IOException, ParserException { + Path root = tempDir.newFolder().toPath(); - try (HDT hdt2 = loadOrMap(hdt2Path, optDefault, map)) { - Triples triples2 = hdt2.getTriples(); + Path hdt1Path = root.resolve("hdt1.hdt"); + Path hdt2Path = root.resolve("hdt2.hdt"); - assertTrue(triples2 instanceof BitmapTriples); + try { + // create 1 one fake HDT + long maxTriples = 10_000L; + try (HDT hdt = LargeFakeDataSetStreamSupplier + .createSupplierWithMaxTriples(maxTriples, 42) + .createFakeHDT(new HDTSpecification())) { + hdt.saveToHDT(hdt1Path.toAbsolutePath().toString(), null); + } + // copy this fake HDT to another file (faster than creating the same twice) + Files.copy(hdt1Path, hdt2Path); + + // optDisk = DISK, optDefault = OLD IMPLEMENTATION + HDTOptions optDisk = HDTOptions.of( + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, + indexMethod + ); + HDTOptions optDefault = HDTOptions.of( + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_LEGACY + ); + + // set config + if (disk) { + optDisk.setOptions( + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK, true, + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX, true, + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK_LOCATION, root.resolve("indexdir").toAbsolutePath() + ); + } + + try ( + ByteArrayOutputStream indexDisk = new ByteArrayOutputStream(); + ByteArrayOutputStream indexDefault = new ByteArrayOutputStream() + ) { + + // create the index for both HDTs + try (HDT hdt = loadOrMapIndexed(hdt1Path, optDisk, map && disk)) { + Triples triples = hdt.getTriples(); - BitmapTriples bitmapTriples2 = (BitmapTriples) triples2; + assertTrue(triples instanceof BitmapTriples); + + BitmapTriples bitmapTriples = (BitmapTriples) triples; if (disk) { - assertNull(bitmapTriples2.diskSequenceLocation); - assertFalse(bitmapTriples2.diskSequence); + assertNotNull(bitmapTriples.diskSequenceLocation); + assertTrue(bitmapTriples.diskSequence); } - bitmapTriples2.saveIndex(indexDefault, new ControlInformation(), null); - assertBitmapTriplesEquals(bitmapTriples2, bitmapTriples); - } + try (HDT hdt2 = loadOrMapIndexed(hdt2Path, optDefault, map)) { + Triples triples2 = hdt2.getTriples(); - bitmapTriples.saveIndex(indexDisk, new ControlInformation(), null); - } + assertTrue(triples2 instanceof BitmapTriples); + + BitmapTriples bitmapTriples2 = (BitmapTriples) triples2; + + if (disk) { + assertNull(bitmapTriples2.diskSequenceLocation); + assertFalse(bitmapTriples2.diskSequence); + } + bitmapTriples2.saveIndex(indexDefault, new ControlInformation(), null); + + assertBitmapTriplesEquals(bitmapTriples2, bitmapTriples); + } + + bitmapTriples.saveIndex(indexDisk, new ControlInformation(), null); + } - // read and compare indexes - byte[] indexDiskArr = indexDisk.toByteArray(); - byte[] indexDefaultArr = indexDefault.toByteArray(); + // read and compare indexes + byte[] indexDiskArr = indexDisk.toByteArray(); + byte[] indexDefaultArr = indexDefault.toByteArray(); - assertArrayEquals("index not equals", indexDefaultArr, indexDiskArr); + assertArrayEquals("index not equals", indexDefaultArr, indexDiskArr); + } + } finally { + PathUtils.deleteDirectory(root); } - } finally { - PathUtils.deleteDirectory(root); } - } - @Test - public void diskBitmapMapIndexedTest() throws IOException, ParserException { - diskBitmapIndexTest(false, true); - } + @Test + public void diskBitmapMapIndexedTest() throws IOException, ParserException { + diskBitmapIndexTest(false, true); + } - @Test - public void diskBitmapLoadIndexedTest() throws IOException, ParserException { - diskBitmapIndexTest(false, true); - } + @Test + public void diskBitmapLoadIndexedTest() throws IOException, ParserException { + diskBitmapIndexTest(false, true); + } - @Test - public void memBitmapMapIndexedTest() throws IOException, ParserException { - diskBitmapIndexTest(false, false); + @Test + public void memBitmapMapIndexedTest() throws IOException, ParserException { + diskBitmapIndexTest(false, false); + } + + @Test + public void memBitmapLoadIndexedTest() throws IOException, ParserException { + diskBitmapIndexTest(false, false); + } } - @Test - public void memBitmapLoadIndexedTest() throws IOException, ParserException { - diskBitmapIndexTest(false, false); + @Ignore("Hand tests") + public static class HandTest extends AbstractTest { + @Test + public void largeTest() throws IOException { + /* + * Hand test to test the indexing of a large HDT with the disk indexer, hdtFile is the file to index, + * workDir is the location to handle the indexing. + */ + Path hdtFile = Path.of("N:\\qEndpoint\\qendpoint\\hdt-store\\index_dev.hdt"); //ASC42 + Path workdir = Path.of("N:\\WIP\\bitmaptriples"); + // Path hdtFile = Path.of("C:\\Users\\wilat\\workspace\\qEndpoint\\qendpoint\\hdt-store\\index_dev.hdt"); //NAMAW + // Path workDir = tempDir.getRoot().toPath(); + + Files.createDirectories(workdir); + + HDTOptions opt = HDTOptions.of( + HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_KEY, HDTOptionsKeys.BITMAPTRIPLES_INDEX_METHOD_VALUE_DISK, + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK, true, + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK_SUBINDEX, true, + HDTOptionsKeys.BITMAPTRIPLES_SEQUENCE_DISK_LOCATION, workdir + ); + + try { + HDTManager.mapIndexedHDT(hdtFile, opt, ProgressListener.sout()).close(); + } finally { + if (Files.exists(workdir)) { + PathUtils.deleteDirectory(workdir); + } + } + } } } diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/ProfilerTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/ProfilerTest.java index 4b22748b..d41a0c68 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/ProfilerTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/ProfilerTest.java @@ -3,7 +3,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rdfhdt.hdt.options.HDTOptionsBase; +import org.rdfhdt.hdt.options.HDTOptions; import java.io.IOException; import java.nio.file.Path; @@ -151,7 +151,7 @@ public void loadBack() { @Test public void loadBackOpt() { - HDTOptionsBase opt = new HDTOptionsBase(); + HDTOptions opt = HDTOptions.of(); long id; try (Profiler prof = Profiler.createOrLoadSubSection("test", opt, true)) { id = prof.getId(); diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/LongArrayDiskTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/LongArrayDiskTest.java index f3ebc58b..12536b83 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/LongArrayDiskTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/LongArrayDiskTest.java @@ -14,6 +14,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.Random; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -104,6 +105,32 @@ public void resizeTest() throws IOException { assertEquals(array.get(3), 4); } } + + @Test + public void clearTest() throws IOException { + Path root = tempDir.getRoot().toPath(); + + long size = 12543; + try (LongArrayDisk array = new LongArrayDisk(root.resolve("file"), size)) { + + assertEquals(array.length(), size); + + Random r1 = new Random(25); + for (long i = 0; i < array.length(); i++) { + array.set(i, r1.nextLong()); + } + Random r2 = new Random(25); + for (long i = 0; i < array.length(); i++) { + assertEquals("#" + i, r2.nextLong(), array.get(i)); + } + + array.clear(); + + for (long i = 0; i < array.length(); i++) { + assertEquals("#" + i, 0, array.get(i)); + } + } + } @Test public void largeResizeTest() throws IOException { Path root = tempDir.getRoot().toPath(); diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArrayTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArrayTest.java index 874b293d..6c70a97d 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArrayTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/disk/SimpleSplitLongArrayTest.java @@ -3,6 +3,7 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.rdfhdt.hdt.util.io.IOUtil; import org.visnow.jlargearrays.LongLargeArray; import java.io.IOException; @@ -24,7 +25,7 @@ public static Collection params() { @Test public void setTest() throws IOException { - LargeLongArray arrayExcepted = new LargeLongArray(new LongLargeArray(128L)); + LargeLongArray arrayExcepted = new LargeLongArray(IOUtil.createLargeArray(128L)); try (SimpleSplitLongArray array = SimpleSplitLongArray.intXArray(128, bits)) { long max = (~0L) >>> (64 - bits); @@ -47,7 +48,7 @@ public void setTest() throws IOException { @Test public void resizeTest() throws IOException { - LargeLongArray arrayExcepted = new LargeLongArray(new LongLargeArray(128L)); + LargeLongArray arrayExcepted = new LargeLongArray(IOUtil.createLargeArray(128L)); try (SimpleSplitLongArray array = SimpleSplitLongArray.intXArray(128, bits)) { assertEquals("non matching size", arrayExcepted.length(), array.length()); diff --git a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/io/compress/CompressTest.java b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/io/compress/CompressTest.java index 166fe11a..3813ae29 100644 --- a/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/io/compress/CompressTest.java +++ b/hdt-java-core/src/test/java/org/rdfhdt/hdt/util/io/compress/CompressTest.java @@ -2,12 +2,15 @@ import org.junit.Assert; import org.junit.Test; +import org.rdfhdt.hdt.compact.integer.VByte; import org.rdfhdt.hdt.iterator.utils.ExceptionIterator; import org.rdfhdt.hdt.triples.IndexedNode; import org.rdfhdt.hdt.util.string.ByteString; import org.rdfhdt.hdt.util.string.CharSequenceComparator; -import org.rdfhdt.hdt.util.string.DelayedString; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; @@ -78,4 +81,35 @@ public void bitMappingTest() { Assert.assertEquals(index1, CompressUtil.computeSharedNode(sharedIndex1, sharedCount)); Assert.assertEquals(sharedCount + index1, CompressUtil.computeSharedNode(CompressUtil.getHeaderId(index1), sharedCount)); } + + @Test + public void decodeSignedTest() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + + VByte.encodeSigned(out, 0xFFL); + VByte.encodeSigned(out, -0xFFL); + + VByte.encodeSigned(out, 0x18L); + VByte.encodeSigned(out, 0x91L); + VByte.encodeSigned(out, 0x75L); + + VByte.encodeSigned(out, 0x186878L); + VByte.encodeSigned(out, 0x9167L); + VByte.encodeSigned(out, 0x75L); + VByte.encodeSigned(out, -0x186878L); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + + Assert.assertEquals(0xFFL, VByte.decodeSigned(in)); + Assert.assertEquals(-0xFFL, VByte.decodeSigned(in)); + + Assert.assertEquals(0x18L, VByte.decodeSigned(in)); + Assert.assertEquals(0x91L, VByte.decodeSigned(in)); + Assert.assertEquals(0x75L, VByte.decodeSigned(in)); + + Assert.assertEquals(0x186878L, VByte.decodeSigned(in)); + Assert.assertEquals(0x9167L, VByte.decodeSigned(in)); + Assert.assertEquals(0x75L, VByte.decodeSigned(in)); + Assert.assertEquals(-0x186878L, VByte.decodeSigned(in)); + } } diff --git a/hdt-java-package/bin/hdtSearch.ps1 b/hdt-java-package/bin/hdtSearch.ps1 index d741388d..d808266e 100644 --- a/hdt-java-package/bin/hdtSearch.ps1 +++ b/hdt-java-package/bin/hdtSearch.ps1 @@ -1,4 +1,10 @@ param( + [String] + $options, + [String] + $config, + [Switch] + $color, [Parameter()] [Switch] $version, diff --git a/hdt-jena/src/main/java/org/rdfhdt/hdtjena/HDTGraphAssembler.java b/hdt-jena/src/main/java/org/rdfhdt/hdtjena/HDTGraphAssembler.java index e8e19f37..2de4b1b8 100644 --- a/hdt-jena/src/main/java/org/rdfhdt/hdtjena/HDTGraphAssembler.java +++ b/hdt-jena/src/main/java/org/rdfhdt/hdtjena/HDTGraphAssembler.java @@ -66,9 +66,9 @@ public Model open(Assembler a, Resource root, Mode mode) // FIXME: Read more properties. Cache config? HDT hdt; if(loadInMemory) { - hdt = HDTManager.loadIndexedHDT(file, null); + hdt = HDTManager.loadIndexedHDT(file); } else { - hdt = HDTManager.mapIndexedHDT(file, null); + hdt = HDTManager.mapIndexedHDT(file); } HDTGraph graph = new HDTGraph(hdt); return ModelFactory.createModelForGraph(graph); diff --git a/hdt-jena/src/main/java/org/rdfhdt/hdtjena/cmd/HDTSparql.java b/hdt-jena/src/main/java/org/rdfhdt/hdtjena/cmd/HDTSparql.java index e929664c..09f8b816 100644 --- a/hdt-jena/src/main/java/org/rdfhdt/hdtjena/cmd/HDTSparql.java +++ b/hdt-jena/src/main/java/org/rdfhdt/hdtjena/cmd/HDTSparql.java @@ -48,7 +48,7 @@ public class HDTSparql { public void execute() throws IOException { // Create HDT - HDT hdt = HDTManager.mapIndexedHDT(fileHDT, null); + HDT hdt = HDTManager.mapIndexedHDT(fileHDT); try { // Create Jena wrapper on top of HDT.