From 7703d551ddc053984995634ecc01527f6ded7196 Mon Sep 17 00:00:00 2001 From: Pascal Essiembre Date: Mon, 11 Apr 2016 14:50:49 -0400 Subject: [PATCH 1/4] - MVStore is now the default URL crawl store. - Maven dependency updates: Norconex Collector Core 1.5.0. - JDBCCrawlDataStoreFactory now deprecated in favor of BasicJDBCCrawlDataStoreFactory from Collector Core. --- norconex-collector-filesystem/pom.xml | 4 ++-- .../src/changes/changes.xml | 13 +++++++++++++ .../fs/crawler/FilesystemCrawlerConfig.java | 4 ++-- .../store/impl/jdbc/JDBCCrawlDataStoreFactory.java | 5 ++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/norconex-collector-filesystem/pom.xml b/norconex-collector-filesystem/pom.xml index 10ae245..69bfa5b 100644 --- a/norconex-collector-filesystem/pom.xml +++ b/norconex-collector-filesystem/pom.xml @@ -19,7 +19,7 @@ 4.0.0 com.norconex.collectors norconex-collector-filesystem - 2.4.0 + 2.5.0-SNAPSHOT Norconex Filesystem Collector @@ -60,7 +60,7 @@ com.norconex.collectors norconex-collector-core - 1.4.0 + 1.5.0-SNAPSHOT joda-time diff --git a/norconex-collector-filesystem/src/changes/changes.xml b/norconex-collector-filesystem/src/changes/changes.xml index 467d89f..ec9f56c 100644 --- a/norconex-collector-filesystem/src/changes/changes.xml +++ b/norconex-collector-filesystem/src/changes/changes.xml @@ -7,6 +7,19 @@ + + + MVStore is now the default URL crawl store. + + + Maven dependency updates: Norconex Collector Core 1.5.0. + + + JDBCCrawlDataStoreFactory now deprecated in favor of + BasicJDBCCrawlDataStoreFactory from Collector Core. + + + Now supports specifying relative paths in startPaths (for local diff --git a/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/crawler/FilesystemCrawlerConfig.java b/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/crawler/FilesystemCrawlerConfig.java index bb4eb5b..cc9c181 100644 --- a/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/crawler/FilesystemCrawlerConfig.java +++ b/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/crawler/FilesystemCrawlerConfig.java @@ -29,7 +29,7 @@ import com.norconex.collector.core.checksum.IMetadataChecksummer; import com.norconex.collector.core.crawler.AbstractCrawlerConfig; -import com.norconex.collector.core.data.store.impl.mapdb.MapDBCrawlDataStoreFactory; +import com.norconex.collector.core.data.store.impl.mvstore.MVStoreCrawlDataStoreFactory; import com.norconex.collector.fs.checksum.impl.FileMetadataChecksummer; import com.norconex.collector.fs.doc.IFileDocumentProcessor; import com.norconex.commons.lang.config.ConfigurationUtil; @@ -59,7 +59,7 @@ public class FilesystemCrawlerConfig extends AbstractCrawlerConfig { public FilesystemCrawlerConfig() { super(); - setCrawlDataStoreFactory(new MapDBCrawlDataStoreFactory()); + setCrawlDataStoreFactory(new MVStoreCrawlDataStoreFactory()); } public String[] getStartPaths() { diff --git a/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/data/store/impl/jdbc/JDBCCrawlDataStoreFactory.java b/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/data/store/impl/jdbc/JDBCCrawlDataStoreFactory.java index 7c7b95c..a225e7f 100644 --- a/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/data/store/impl/jdbc/JDBCCrawlDataStoreFactory.java +++ b/norconex-collector-filesystem/src/main/java/com/norconex/collector/fs/data/store/impl/jdbc/JDBCCrawlDataStoreFactory.java @@ -1,4 +1,4 @@ -/* Copyright 2014 Norconex Inc. +/* Copyright 2014-2016 Norconex Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import com.norconex.collector.core.data.store.ICrawlDataStore; import com.norconex.collector.core.data.store.impl.jdbc.AbstractJDBCDataStoreFactory; +import com.norconex.collector.core.data.store.impl.jdbc.BasicJDBCCrawlDataStoreFactory; import com.norconex.collector.core.data.store.impl.jdbc.BasicJDBCSerializer; import com.norconex.collector.core.data.store.impl.jdbc.IJDBCSerializer; import com.norconex.collector.core.data.store.impl.jdbc.JDBCCrawlDataStore.Database; @@ -34,7 +35,9 @@ * * * @author Pascal Essiembre + * @deprecated Since 1.5.0, use {@link BasicJDBCCrawlDataStoreFactory} */ +@Deprecated public class JDBCCrawlDataStoreFactory extends AbstractJDBCDataStoreFactory { public JDBCCrawlDataStoreFactory() { From e96652cda7c6482d69ed742bfbc6c467bb6642f4 Mon Sep 17 00:00:00 2001 From: Pascal Essiembre Date: Tue, 3 May 2016 11:55:21 -0400 Subject: [PATCH 2/4] Typo fix in self-reference xml. --- .../examples/collector-filesystem-config-reference.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/norconex-collector-filesystem/src/site/resources/examples/collector-filesystem-config-reference.xml b/norconex-collector-filesystem/src/site/resources/examples/collector-filesystem-config-reference.xml index 87bf5a0..33f6443 100644 --- a/norconex-collector-filesystem/src/site/resources/examples/collector-filesystem-config-reference.xml +++ b/norconex-collector-filesystem/src/site/resources/examples/collector-filesystem-config-reference.xml @@ -207,7 +207,7 @@ options. Please refer to committer for complete documentation. Below is an example using the FileSystemCommitter. --> - + $workdir\crawledFiles From 3b6720285bf010a2616ecf968b1809dfb3e8eafa Mon Sep 17 00:00:00 2001 From: Pascal Essiembre Date: Fri, 3 Jun 2016 09:29:03 -0400 Subject: [PATCH 3/4] Prepare for release. --- norconex-collector-filesystem/pom.xml | 2 +- norconex-collector-filesystem/src/changes/changes.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/norconex-collector-filesystem/pom.xml b/norconex-collector-filesystem/pom.xml index 69bfa5b..19ddf46 100644 --- a/norconex-collector-filesystem/pom.xml +++ b/norconex-collector-filesystem/pom.xml @@ -60,7 +60,7 @@ com.norconex.collectors norconex-collector-core - 1.5.0-SNAPSHOT + 1.5.0 joda-time diff --git a/norconex-collector-filesystem/src/changes/changes.xml b/norconex-collector-filesystem/src/changes/changes.xml index ec9f56c..d9d4cee 100644 --- a/norconex-collector-filesystem/src/changes/changes.xml +++ b/norconex-collector-filesystem/src/changes/changes.xml @@ -7,7 +7,7 @@ - + MVStore is now the default URL crawl store. From f1f8f6be0b8eac761dbf5e7b50ff3ec8f2a1ceca Mon Sep 17 00:00:00 2001 From: Pascal Essiembre Date: Fri, 3 Jun 2016 09:31:51 -0400 Subject: [PATCH 4/4] Prepare for release. --- norconex-collector-filesystem/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/norconex-collector-filesystem/pom.xml b/norconex-collector-filesystem/pom.xml index 19ddf46..371def0 100644 --- a/norconex-collector-filesystem/pom.xml +++ b/norconex-collector-filesystem/pom.xml @@ -19,7 +19,7 @@ 4.0.0 com.norconex.collectors norconex-collector-filesystem - 2.5.0-SNAPSHOT + 2.5.0 Norconex Filesystem Collector