diff --git a/src/test/scala/io/archivesunleashed/ArcTest.scala b/src/test/scala/io/archivesunleashed/ArcTest.scala
index 0daac6b3..ee19ae11 100644
--- a/src/test/scala/io/archivesunleashed/ArcTest.scala
+++ b/src/test/scala/io/archivesunleashed/ArcTest.scala
@@ -41,11 +41,11 @@ class ArcTest extends FunSuite with BeforeAndAfter {
val dayMonthTestA = "200805"
- test("count records") {
+ test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
}
- test("filter date") {
+ test("Filter date RDD") {
val startSS = 0
val monthSS = 6
val four = RecordLoader.loadArchives(arcPath, sc)
@@ -62,7 +62,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
five.foreach(date => assert(date.substring(startSS, monthSS) == dayMonthTestA))
}
- test("filter url pattern") {
+ test("Filter URL pattern RDD") {
val keepMatches = RecordLoader.loadArchives(arcPath, sc)
.keepUrlPatterns(Set("http://www.archive.org/about/.*".r))
val discardMatches = RecordLoader.loadArchives(arcPath, sc)
@@ -71,14 +71,14 @@ class ArcTest extends FunSuite with BeforeAndAfter {
assert(discardMatches.count == 284L)
}
- test("count links") {
+ test("Count links RDD") {
val links = RecordLoader.loadArchives(arcPath, sc)
.map(r => ExtractLinksRDD(r.getUrl, r.getContentString))
.reduce((a, b) => a ++ b)
assert(links.size == 664)
}
- test("detect language") {
+ test("Detect language RDD") {
val languageCounts = RecordLoader.loadArchives(arcPath, sc)
.keepMimeTypes(Set("text/html"))
.map(r => RemoveHTMLRDD(r.getContentString))
@@ -99,7 +99,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
}
}
- test("detect mime type tika") {
+ test("Detect MIMEtype Tika RDD") {
val mimeTypeCounts = RecordLoader.loadArchives(arcPath, sc)
.map(r => RemoveHTTPHeaderRDD(r.getContentString))
.groupBy(content => DetectMimeTypeTika(content.getBytes))
diff --git a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
index 14521416..0cdc196b 100644
--- a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
+++ b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
@@ -46,12 +46,12 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("count records") {
+ test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
assert(RecordLoader.loadArchives(warcPath, sc).count == 299L)
}
- test("Resource name produces expected result.") {
+ test("Resource name produces expected result") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => FilenameUtils.getName(x.getArchiveFilename))
.take(3)
@@ -81,7 +81,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
assert(textSampleWarc.deep == Array("", exampleUrl, exampleUrl).deep)
}
- test("Urls") {
+ test("URLs") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getUrl).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
@@ -92,7 +92,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/robots.txt", "http://www.archive.org/").deep)
}
- test("Mime-Type") {
+ test("MIMEtype") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getMimeType).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
@@ -103,7 +103,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"text/html").deep)
}
- test("Get Http Status") {
+ test("Get HTTP status") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getHttpStatus).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
diff --git a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala
index 57aff922..2f56aeb2 100644
--- a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala
+++ b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala
@@ -38,7 +38,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("count records") {
+ test("Count records; Extract Domain RDD ") {
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
.map(r => ExtractDomainRDD(r.getUrl))
diff --git a/src/test/scala/io/archivesunleashed/RecordDFTest.scala b/src/test/scala/io/archivesunleashed/RecordDFTest.scala
index 0d9dff20..774587f4 100644
--- a/src/test/scala/io/archivesunleashed/RecordDFTest.scala
+++ b/src/test/scala/io/archivesunleashed/RecordDFTest.scala
@@ -39,7 +39,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("keep Valid Pages") {
+ test("Keep valid pages DF") {
val expected = "http://www.archive.org/"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
@@ -48,7 +48,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard MimeTypes") {
+ test("Discard MIMEtypes DF") {
val expected = "filedesc://IAH-20080430204825-00000-blackbook.arc"
val mimeTypes = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -59,7 +59,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard Date") {
+ test("Discard date DF") {
val expected = "20080430"
val date = "20080429"
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -70,7 +70,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard Urls") {
+ test("Discard URLs DF") {
val expected = "http://www.archive.org/index.php"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -81,7 +81,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard Domains") {
+ test("Discard domains DF") {
val expected = "http://www.hideout.com.br/"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -92,7 +92,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard HttpStatus") {
+ test("Discard HTTP status DF") {
val expected = "200"
val statusCode = Set("000")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -103,7 +103,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard Content") {
+ test("Discard content DF") {
val expected = "dns:www.archive.org"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -115,7 +115,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard UrlPatterns") {
+ test("Discard URL patterns DF") {
val expected = "dns:www.archive.org"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -127,7 +127,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Discard Languages") {
+ test("Discard languages DF") {
val expected = "dns:www.archive.org"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -139,7 +139,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep HttpStatus") {
+ test("Keep HTTP status DF") {
val expected = "http://www.archive.org/robots.txt"
val statusCode = Set("200")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -150,7 +150,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep Date") {
+ test("Keep date DF") {
val expected = "http://www.archive.org/"
val month = List("04")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -161,7 +161,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep Urls") {
+ test("Keep URLs DF") {
val expected = "http://www.archive.org/"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -172,7 +172,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep Domains") {
+ test("Keep domains DF") {
val expected = "http://www.archive.org/robots.txt"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -183,7 +183,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep MimeTypesTika") {
+ test("Keep MIMEtypes Tika DF") {
val expected = "image/jpeg"
val mimeType = Set("image/jpeg")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -194,7 +194,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep MimeTypes") {
+ test("Keep MIMEtypes DF") {
val expected = "text/html"
val mimeType = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -205,7 +205,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep content") {
+ test("Keep content DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -217,7 +217,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep UrlPatterns") {
+ test("Keep URL patterns DF") {
val expected = "http://www.archive.org/images/go-button-gateway.gif"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -229,7 +229,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep Languages") {
+ test("Keep languages DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
@@ -241,7 +241,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}
- test("Keep keepMimeTypes") {
+ test("Keep images DF") {
val expected = "image/jpeg"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
diff --git a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
index a784a6fe..9260e666 100644
--- a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
+++ b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
@@ -37,7 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("loads Warc") {
+ test("Load WARC") {
val base = RecordLoader.loadArchives(warcPath, sc)
.keepValidPages()
.map(x => x.getUrl)
diff --git a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala
index f5b89ea0..4121513c 100644
--- a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala
+++ b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala
@@ -43,21 +43,21 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("no valid pages") {
+ test("Expect no valid pages RDD") {
val expectedLength = 0
val base = RecordLoader.loadArchives(badPath, sc)
.keepValidPages().take(2)
assert (base.length == expectedLength)
}
- test ("no images") {
+ test ("Expect no images RDD") {
val expectedLength = 0
val base = RecordLoader.loadArchives(badPath, sc)
.keepValidPages().take(2)
assert (base.length == expectedLength)
}
- test("keep date") {
+ test("Keep date RDD") {
val testDate = "2008"
val base = RecordLoader.loadArchives(arcPath, sc)
val component = DateComponent.YYYY
@@ -68,7 +68,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
.map ( mp => mp.getUrl).take(3)
assert (r2.sameElements(r)) }
- test ("keep http status codes") {
+ test ("Keep HTTP status codes RDD") {
val expected = 94
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -77,7 +77,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("keep urls") {
+ test ("Keep URLs RDD") {
val expected = 1
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -86,7 +86,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("keep url patterns") {
+ test ("Keep URL patterns RDD") {
val expected = 1
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -95,7 +95,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("check for domains") {
+ test ("Keep domains RDD") {
val expected = 91
val base2 = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -104,7 +104,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (x2 == expected )
}
- test ("keep languages") {
+ test ("Keep languages RDD") {
val base2 = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
val langs: Set[String] = Set("en", "fr")
@@ -115,7 +115,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2.sameElements(r))
}
- test ("discard languages") {
+ test ("Discard languages RDD") {
val base2 = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
val langs: Set[String] = Set("fr")
@@ -125,7 +125,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2.sameElements(r))
}
- test ("keep mime tika") {
+ test ("Keep MIMEtype Tika RDD") {
val base = RecordLoader.loadArchives(arcPath, sc)
val mime = Set ("text/plain", "image/jpeg")
val r2 = base.keepMimeTypesTika(mime)
@@ -135,7 +135,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/images/logoc.jpg").deep)
}
- test ("keep mime web server") {
+ test ("Keep MIMEtype RDD") {
val base = RecordLoader.loadArchives(arcPath, sc)
val mime = Set ("text/plain", "image/jpeg")
val r2 = base.keepMimeTypes(mime)
@@ -145,7 +145,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/images/logoc.jpg").deep)
}
- test ("check for keep content"){
+ test ("Keep content RDD"){
val expected = 1
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -156,7 +156,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (y1 == expected)
}
- test ("discard mime web server") {
+ test ("Discard MIMEtype RDD") {
val base = RecordLoader.loadArchives(arcPath, sc)
val mime = Set ("text/plain", "image/jpeg")
val r2 = base.discardMimeTypes(mime)
@@ -165,7 +165,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/index.php").deep)
}
- test ("discard mime tika") {
+ test ("Discard MIMEtype Tika RDD") {
val base = RecordLoader.loadArchives(arcPath, sc)
val mime = Set ("text/plain", "image/jpeg")
val r2 = base.discardMimeTypesTika(mime)
@@ -174,7 +174,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/", "http://www.archive.org/index.php").deep)
}
- test ("discard date") {
+ test ("Discard date RDD") {
val base = RecordLoader.loadArchives(arcPath, sc)
val date = "20080430"
val r = base.filter( x=> x.getCrawlDate != date).collect()
@@ -182,7 +182,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r.deep == Array().deep)
}
- test ("discard urls") {
+ test ("Discard URLs RDD") {
val expected = 94
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -191,7 +191,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("discard url patterns") {
+ test ("Discard URL patterns RDD") {
val expected = 93
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -200,7 +200,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("discard http status codes") {
+ test ("Discard HTTP status codes RDD") {
val expected = 46
val base = RecordLoader.loadArchives(arcPath, sc)
val statusCodes: Set[String] = Set ("200", "404")
@@ -208,7 +208,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("discard domains") {
+ test ("Discard domains RDD") {
val expected = 94
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
@@ -217,7 +217,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter {
assert (r2 == expected)
}
- test ("discard content") {
+ test ("Discard content RDD") {
val expected = 93
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
diff --git a/src/test/scala/io/archivesunleashed/WarcTest.scala b/src/test/scala/io/archivesunleashed/WarcTest.scala
index f79b936f..5eb2b112 100644
--- a/src/test/scala/io/archivesunleashed/WarcTest.scala
+++ b/src/test/scala/io/archivesunleashed/WarcTest.scala
@@ -41,11 +41,11 @@ class WarcTest extends FunSuite with BeforeAndAfter {
records = RecordLoader.loadArchives(warcPath, sc)
}
- test("count records") {
+ test("Count records") {
assert(299L == records.count)
}
- test("warc extract domain") {
+ test("WARC extract domain RDD") {
val take = 10
val expectedLength = 3
val r = records
@@ -57,7 +57,7 @@ class WarcTest extends FunSuite with BeforeAndAfter {
assert(r.length == expectedLength)
}
- test("warc get content") {
+ test("WARC get content RDD") {
val a = RecordLoader.loadArchives(warcPath, sc)
.map(r => r.getContentString)
.take(1)
diff --git a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala
index 7c898f59..efaa5565 100644
--- a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala
@@ -70,7 +70,7 @@ class CommandLineAppTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("command line app tests") {
+ test("Command line app functionality tests") {
for {a <- testSuccessCmds} {
app.CommandLineAppRunner.test(a, sc)
assert(Files.exists(Paths.get(outputDir)))
diff --git a/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala
index 0ec4d047..716cdabd 100644
--- a/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala
@@ -38,7 +38,7 @@ class DomainFrequencyExtractorTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("DomainFrequencyExtractor") {
+ test("Domain frequency extractor RDD & DF") {
val rdd = RecordLoader.loadArchives(arcPath, sc).keepValidPages()
val df = RecordLoader.loadArchives(arcPath, sc).webpages()
diff --git a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala
index 6d17d690..42685824 100644
--- a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala
@@ -38,7 +38,7 @@ class DomainGraphExtractorDfTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Web Graph Extractor") {
+ test("Domain graph extractor DF") {
val TESTLENGTH = 166
val TESTRESULT = 280
val df = RecordLoader.loadArchives(arcPath, sc).webgraph()
diff --git a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala
index 76402b81..3fa14ad7 100644
--- a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala
@@ -38,7 +38,7 @@ class DomainGraphExtractorTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("DomainGraphExtractor") {
+ test("Domain graph extractor RDD") {
val rdd = RecordLoader.loadArchives(arcPath, sc)
val rddResult = DomainGraphExtractor(rdd).collect()
diff --git a/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala
index ed4da00b..9a57d882 100644
--- a/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala
@@ -51,7 +51,7 @@ class ExtractEntitiesTest extends FunSuite with BeforeAndAfter {
LOG.info("Output can be found in " + tempDir.getPath)
}
- test("Extract from Record") {
+ test("Extract entities from record") {
val e = ExtractEntities.extractFromRecords(iNerClassifierFile, archivePath, tempDir + "/scrapeArcEntities", sc).take(3).last
assert(e._1 == "hello")
}
diff --git a/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala
index 58736e30..a77d4e8f 100644
--- a/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala
@@ -48,7 +48,7 @@ import scala.util.Try
sc = new SparkContext(conf)
}
- test ("Case classes are empty") {
+ test ("Check for empty classes") {
val pageRank = 0.56
val weak = 4
val strong = 5
@@ -65,7 +65,7 @@ import scala.util.Try
assert(testPR.strong == strong)
}
- test("creates a network with pagerank scores") {
+ test("Create a network with pagerank scores") {
val pageRank = 0.9943090942904987
val connected = -649648005
val minEdges = 5
@@ -83,7 +83,7 @@ import scala.util.Try
assert(pRank(0)._2.strong == connected)
}
- test("creates a network using dynamic PR") {
+ test("Create a network using dynamic pagerank") {
val dynPageRank = 0.9999999999999986
val connected = -1054421350
val minEdges = 5
diff --git a/src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala
similarity index 95%
rename from src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala
rename to src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala
index bfb69539..66df8a2d 100644
--- a/src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala
@@ -37,7 +37,7 @@ class ExtractImageDetailsDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("extracts Image Details") {
+ test("Extracts image details DF") {
val exampledf = RecordLoader.loadArchives(arcPath, sc).keepImages().all()
val imageDetails = ExtractImageDetailsDF(exampledf)
val response1 = "http://www.archive.org/images/logoc.jpg"
diff --git a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala
index 669fa192..c12c3f92 100644
--- a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala
@@ -39,7 +39,7 @@ class ExtractPopularImagesDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("extracts popular images") {
+ test("Extract popular images DF") {
val highTest = 507
val exampledf = RecordLoader.loadArchives(arcPath, sc).images()
val imagesLowLimit = ExtractPopularImagesDF(exampledf, 3)
diff --git a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala
index 38ffd509..ab3040e4 100644
--- a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala
@@ -39,7 +39,7 @@ class ExtractPopularImagesRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("extracts popular images") {
+ test("Extract popular images RDD") {
val highTest = 507
val examplerdd = RecordLoader.loadArchives(arcPath, sc)
val imagesLowLimit = ExtractPopularImagesRDD(examplerdd, 3, sc)
diff --git a/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala
index 1cd66fbd..21b5ac81 100644
--- a/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala
@@ -38,7 +38,7 @@ class PlainTextExtractorTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("PlainTextExtractorTest") {
+ test("Plain text extractor RDD & DF") {
val rdd = RecordLoader.loadArchives(arcPath, sc).keepValidPages()
val df = RecordLoader.loadArchives(arcPath, sc).webpages()
val rddResults = PlainTextExtractor(rdd).collect()
diff --git a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala
index 45eeae8b..73220e9f 100644
--- a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala
@@ -45,7 +45,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{
sc = new SparkContext(conf)
}
- test("creates the file") {
+ test("Creates the GEXF file") {
val testLines = (0, 12, 22, 34)
val networkrdd = sc.parallelize(network)
WriteGEXF(networkrdd, testFile)
@@ -57,7 +57,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{
assert(lines(testLines._4) == """""")
}
- test("creates the file from Array[Row]") {
+ test("Creates the GEXF file from Array[Row]") {
val testLines = (0, 12, 22, 34)
if (Files.exists(Paths.get(testFile))) {
new File(testFile).delete()
@@ -74,7 +74,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{
assert(!WriteGEXF(networkarray ,""))
}
- test ("returns a Bool depending on pass or failure") {
+ test("Test if GEXF path is empty") {
val networkrdd = sc.parallelize(network)
val gexf = WriteGEXF(networkrdd, testFile)
assert(gexf)
diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala
index 5d788980..576fdfcf 100644
--- a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala
@@ -46,7 +46,7 @@ class WriteGraphMLTest extends FunSuite with BeforeAndAfter{
sc = new SparkContext(conf)
}
- test("creates the file") {
+ test("Create GraphML the file") {
val networkrdd = sc.parallelize(network)
val lineCheck = (0, 15, 22, 30)
WriteGraphML(networkrdd, testFile)
@@ -58,7 +58,7 @@ class WriteGraphMLTest extends FunSuite with BeforeAndAfter{
assert(lines(lineCheck._4) == """3""")
}
- test ("returns a Bool depending on pass or failure") {
+ test ("Test if GraphML path is empty") {
val networkrdd = sc.parallelize(network)
val graphml = WriteGraphML(networkrdd, testFile)
assert(graphml)
diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala
index 46029be9..2be89d51 100644
--- a/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala
@@ -64,7 +64,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
sc = new SparkContext(conf)
}
- test("creates the file") {
+ test("Create the graph file") {
val testLines = (0, 12, 22, 34)
val networkrdd = sc.parallelize(network)
WriteGraph.asGexf(networkrdd, testFile)
@@ -76,7 +76,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert(lines(testLines._4) == """""")
}
- test("creates the file from Array[Row]") {
+ test("Create the graph file from Array[Row]") {
val testLines = (0, 12, 22, 34)
if (Files.exists(Paths.get(testFile))) {
new File(testFile).delete()
@@ -94,14 +94,14 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert(!WriteGraph(networkarray, ""))
}
- test ("returns a Bool depending on pass or failure") {
+ test ("Test if GEXF path is empty") {
val networkrdd = sc.parallelize(network)
val gexf = WriteGraph.asGexf(networkrdd, testFile)
assert(gexf)
assert(!WriteGraph.asGexf(networkrdd, ""))
}
- test ("Nodes zip with ids") {
+ test ("Nodes ZIP with IDs") {
val networkrdd = sc.parallelize(networkWithDuplication)
val nodeIds = WriteGraph.nodesWithIds(networkrdd).collect
val expected = ("Source3", 0)
@@ -120,7 +120,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert (WriteGraph.nodeIdFromLabel(Option(null)) == -1)
}
- test ("Gets the id from a lookup") {
+ test ("Gets the ID from a lookup") {
val nodes = WriteGraph.nodesWithIds(sc.parallelize(network))
val empty = -1
val expected = 6
@@ -130,7 +130,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert (WriteGraph.nodeIdFromLabel(badlookup) == empty)
}
- test ("Edge ids are captured from lookup") {
+ test ("Edge IDs are captured from lookup") {
val edges = WriteGraph.edgeNodes(sc.parallelize(network))
val expected = Array((date1, 6, 3, 3),
(date2, 7, 4, 4),
@@ -138,7 +138,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert(edges.collect.deep == expected)
}
- test ("Graphml produces correct output") {
+ test ("GraphML produces correct output") {
val testLines = (0, 12, 30, 37)
val networkrdd = sc.parallelize(network)
WriteGraph.asGraphml(networkrdd, testFile)
@@ -150,7 +150,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert(lines(testLines._4) == """""")
}
- test ("Graphml works with unescaped xml data") {
+ test ("GraphML works with unescaped XML data") {
val testLines = (0, 12, 30, 37)
val networkrdd = sc.parallelize(unescapedNetwork)
WriteGraph.asGraphml(networkrdd, testFile)
@@ -162,7 +162,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{
assert(lines(testLines._4) == """""")
}
- test( "Gexf works with unescaped xml data") {
+ test( "GEXF works with unescaped XML data") {
val testLines = (0, 12, 29, 31)
val networkrdd = sc.parallelize(unescapedNetwork)
WriteGraph(networkrdd, testFile2)
diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala
index b7083127..27337895 100644
--- a/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala
+++ b/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala
@@ -42,7 +42,7 @@ class WriteGraphXMLTest extends FunSuite with BeforeAndAfter{
sc = new SparkContext(conf)
}
- test("creates the file") {
+ test("Create the graph XML file") {
val headerLocation = 0
val expectedLine = 13
val networkrdd = ExtractGraphX.extractGraphX(sc.parallelize(network))
@@ -54,7 +54,7 @@ class WriteGraphXMLTest extends FunSuite with BeforeAndAfter{
assert(lines(expectedLine) == """""")
}
- test ("returns a Bool depending on pass or failure") {
+ test ("Test if GraphML path is empty") {
val networkrdd = ExtractGraphX.extractGraphX(sc.parallelize(network))
val pRank = ExtractGraphX.runPageRankAlgorithm(networkrdd)
val graphml = WriteGraphXML(pRank, testFile)
diff --git a/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala b/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala
index 3ca90546..6b6e14cc 100644
--- a/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala
@@ -44,7 +44,7 @@ class DataFrameLoaderTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Test DataFrameLoader") {
+ test("Test DataFrameLoader (connection to PySpark)") {
val df = new DataFrameLoader(sc)
val validPages = df.webpages(arcPath)
val hyperlinks = df.webgraph(arcPath)
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala
index 9ea899da..beca222b 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala
@@ -38,7 +38,7 @@ class AudioTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Audio DF extraction") {
+ test("Audio files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.audio()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala
index bb660f7e..3f960d4a 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala
@@ -39,7 +39,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Extract Dates DataFrame YYYY") {
+ test("Extract dates YYYY DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
@@ -77,7 +77,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter {
assert(results(2).get(3) == "http://www.sloan.org")
}
- test("Extract Dates DataFrame YYYYMM") {
+ test("Extract dates YYYYMM DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
@@ -115,7 +115,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter {
assert(results(2).get(3) == "http://www.sloan.org")
}
- test("Extract Dates DataFrame MM") {
+ test("Extract dates MM DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
@@ -153,7 +153,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter {
assert(results(2).get(3) == "http://www.sloan.org")
}
- test("Extract Dates DataFrame DD") {
+ test("Extract dates DD DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
@@ -191,7 +191,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter {
assert(results(2).get(3) == "http://www.sloan.org")
}
- test("Extract Dates DataFrame YYYYMMDD") {
+ test("Extract dates YYYYMMDD DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala
index 4abd7021..51fa100b 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala
@@ -39,7 +39,7 @@ class ExtractHyperlinksTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("count records") {
+ test("Extract links DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala
index ebe754e7..843635ae 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala
@@ -38,7 +38,7 @@ class ExtractImageDetailsTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Image DF extraction") {
+ test("Image files extraction DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.images()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala
index 07ad0efe..f7e9453a 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala
@@ -38,7 +38,7 @@ class ImageLinksTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Fetch image links") {
+ test("Image links extraction DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.imageLinks()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala
index 15d31485..b17fb32a 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala
@@ -38,7 +38,7 @@ class ExtractPDFDetailsTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("PDF DF extraction") {
+ test("PDF files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.pdfs()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala
index 26821a22..4168e5a6 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala
@@ -38,7 +38,7 @@ class PresentationProgramFilesTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Presentation Program DF extraction") {
+ test("Presentation program files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.presentationProgramFiles()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala
index 569362e2..c326b021 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala
@@ -38,7 +38,7 @@ class ExtractSpreadsheetDetailsTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Spreadsheet DF extraction") {
+ test("Spreadsheet files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.spreadsheets()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala
index c54b923e..97523fdc 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala
@@ -40,7 +40,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers {
sc = new SparkContext(conf)
}
- test("Text Files DF extraction") {
+ test("Text files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.textFiles()
@@ -56,7 +56,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers {
assert("32abd404fb560ecf14b75611f3cc5c2c" == extracted(0)(5))
}
- test("Text Files DF robots.txt") {
+ test("Text files robots.txt DF") {
val df = RecordLoader.loadArchives(testPath, sc)
.textFiles()
@@ -75,7 +75,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers {
robots(1)(0).toString should not include (".html")
}
- test("Text Files DF dns or filedesc") {
+ test("Text files dns or filedesc DF") {
val df = RecordLoader.loadArchives(filedescPath, sc)
.textFiles()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala
index b2521df4..be1048b0 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala
@@ -38,7 +38,7 @@ class VideoTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Video DF extraction") {
+ test("Video files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.videos()
diff --git a/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala
index e623416f..a9d474de 100644
--- a/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala
@@ -38,7 +38,7 @@ class WordProcessorFilesTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Word Processor DF extraction") {
+ test("Word processor files extraction DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.wordProcessorFiles()
diff --git a/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala b/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala
index 8be9ba51..a0f9abc0 100644
--- a/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala
@@ -49,7 +49,7 @@ class SaveMediaBytesTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("Save audio bytes to disk") {
+ test("Save audio bytes to disk DF") {
val df = RecordLoader.loadArchives(warcPath, sc)
.audio()
@@ -67,7 +67,7 @@ class SaveMediaBytesTest extends FunSuite with BeforeAndAfter {
Files.delete(Paths.get(fileName))
}
- test("Attempt to save invalid audio") {
+ test("Attempt to save invalid audio DF") {
val dummyEncBytes = Base64.getEncoder.encodeToString(Array.range(0, 127)
.map(_.toByte))
val dummyMD5 = ComputeMD5RDD(dummyEncBytes.getBytes)
diff --git a/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala b/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala
index db0b07e1..a2f9f397 100644
--- a/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala
+++ b/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala
@@ -39,7 +39,7 @@ class SimpleDfTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}
- test("count records") {
+ test("Count records DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.webpages()
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala
index b2c58100..3b7981ca 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala
@@ -36,7 +36,7 @@ class ComputeImageSizeTest extends FunSuite {
var image: Array[Byte] = ios.toByteArray();
ios.close()
- test ("check images") {
+ test ("Check images and provide size RDD") {
val imageSize = (10, 10)
val emptyImageSize = (0, 0)
assert(ComputeImageSize(image) == imageSize)
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala
index 49be1e2f..db1ea680 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala
@@ -31,13 +31,13 @@ class ExtractBoilerPipeTextTest extends FunSuite {
"""
var boiler = """Copyright 2017"""
- test("Collects boilerpipe") {
+ test("Collects boilerpipe RDD") {
assert(ExtractBoilerpipeTextRDD(text) == boiler)
assert(ExtractBoilerpipeTextRDD("") == "")
assert(ExtractBoilerpipeTextRDD("All Rights Reserved.") == "")
}
- test("Removes Header information") {
+ test("Removes HTTP header information RDD") {
assert(ExtractBoilerpipeTextRDD(header + text) == boiler)
}
}
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala
index 8f15aace..7cdaa278 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class ExtractDateRDDTest extends FunSuite {
- test("RDD date extraction") {
+ test("Date extraction RDD") {
val date = "20151204"
val startSS = 0
val yearSS = 4
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala
index af7fe246..23249027 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala
@@ -42,26 +42,26 @@ class ExtractDomainRDDTest extends FunSuite {
private val data3 = Seq.newBuilder.+=(
("http://www.seetorontonow.canada-booknow.com\\booking_results.php", "www.seetorontonow.canada-booknow.com")).result()
- test("simple") {
+ test("Extract simple domain extraction RDD") {
data1.foreach {
case (link, domain) => assert(ExtractDomainRDD(link) == domain)
}
}
- test("withBase") {
+ test("Extract domains with base RDD") {
data2.foreach {
case (link, base, domain) => assert(ExtractDomainRDD(link, base) == domain)
}
}
- test("error") {
+ test("Test for domain errors RDD") {
// scalastyle:off null
assert(ExtractDomainRDD(null) == "")
assert(ExtractDomainRDD(index, null) == "")
// scalastyle:on null
}
- test("backslash") {
+ test("Test for domain backslash RDD") {
data3.foreach {
case (link, domain) => assert(ExtractDomainRDD(link) == domain)
}
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala
index 37343524..d13eeb36 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class ExtractImageLinksRDDTest extends FunSuite {
- test("simple") {
+ test("Extract simple image links RDD") {
val fragment =
"""Image here: and another """
val extracted = ExtractImageLinksRDD("", fragment).toList
@@ -33,7 +33,7 @@ class ExtractImageLinksRDDTest extends FunSuite {
assert("http://baz.org/a/b/banner.jpg" == extracted(1))
}
- test("relative") {
+ test("Extract relative image links RDD") {
val fragment =
"""Image here: and another and """
val extracted = ExtractImageLinksRDD("http://foo.bar.com/a/page.html", fragment)
@@ -43,7 +43,7 @@ class ExtractImageLinksRDDTest extends FunSuite {
assert("http://foo.bar.com/logo.gif" == extracted(2))
}
- test("errors") {
+ test("Test image link errors RDD") {
val fragment =
"""Image here: and another and """
assert(ExtractImageLinksRDD("", "") == Nil)
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala
index 4b18727d..9bfeef84 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala
@@ -35,7 +35,7 @@ class ExtractLinksRDDTest extends FunSuite {
val twitter = "http://www.twitter.com/"
val head = "a search engine"
- test("simple") {
+ test("Extract simple links RDD") {
val extracted: Seq[(String, String, String)] = ExtractLinksRDD("", fragment)
assert(extracted.size == 2)
assert(url == extracted.head._2)
@@ -44,7 +44,7 @@ class ExtractLinksRDDTest extends FunSuite {
assert("Twitter" == extracted.last._3)
}
- test("relative") {
+ test("Extract relative links RDD") {
val fragmentLocal: String = "Here is " +
"a search engine.\nHere is a a relative URL.\n"
val fooFragmentLocal = "http://www.foobar.org/page.html"
@@ -56,7 +56,7 @@ class ExtractLinksRDDTest extends FunSuite {
assert("a relative URL" == extracted.last._3)
}
- test("errors") {
+ test("Test link errors RDD") {
val bytes: Array[Byte] = "wronglyTyped".getBytes()
val invalid: String = "Here is a fake url bogus search engine."
// scalastyle:off null
diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala
index 38c9b984..3e38bc24 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.Matchers
@RunWith(classOf[JUnitRunner])
class ExtractTextFromPDFsTest extends FunSuite with Matchers {
- test("get parser") {
+ test("Get PDF parser") {
ExtractTextFromPDFs.pdfParser shouldBe a[PDFParser]
}
}
diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala
index a1a8a5ef..bb96b02b 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class RemoveHTMLRDDTest extends FunSuite {
- test("simple") {
+ test("Remove HTML RDD") {
val html =
"""
diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala
index 81d68d07..c7c6ae25 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala
@@ -22,7 +22,7 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class RemoveHTTPHeaderRDDTest extends FunSuite {
- test("simple") {
+ test("Remove HTTP header RDD") {
val header = "HTTP/1.1 200 OK\r\n\r\nHello content"
val nohttp = "This has no Http"
val removed = RemoveHTTPHeaderRDD(header)
diff --git a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala
index 70dbb4ed..3a3c7cb9 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class StringUtilsTest extends FunSuite {
- test("remove prefix") {
+ test("Remove prefix") {
val s: String = "www.example.com"
// scalastyle:off null
val n: String = null
@@ -33,7 +33,7 @@ class StringUtilsTest extends FunSuite {
assert(n.removePrefixWWW() == "")
}
- test("create hash") {
+ test("Create hash") {
val invalid: String = "AC&D\"";
// scalastyle:off null
val except: String = null;
@@ -43,11 +43,11 @@ class StringUtilsTest extends FunSuite {
assert (caught.getMessage == "Caught exception processing input row ");
}
- test ("md5 hash") {
+ test ("MD5 hash") {
val s: String = "unesco.org";
assert(ComputeMD5RDD(s.getBytes) == "8e8decc8e8107bcf9d3896f3222b77d8");
}
- test ("sh1 hash") {
+ test ("SHA1 hash") {
val s: String = "unesco.org";
assert(ComputeSHA1RDD(s.getBytes) == "2d0e5377157172045d87befe46e157cda42c4f6e");
}
diff --git a/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala b/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala
index a2f9337b..f9c23ddd 100644
--- a/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala
+++ b/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala
@@ -29,13 +29,13 @@ import ops.tuple.ToList
@RunWith(classOf[JUnitRunner])
class TupleFormatterTest extends FunSuite with Matchers {
- test("tab delimit") {
+ test("Tab delimit") {
val tuple = (("ab", "bl", ("c", 9)), "d", 5, ("hi", 1))
assert(TupleFormatter.tabDelimit(tuple) == "ab\tbl\tc\t9\td\t5\thi\t1")
assert(TupleFormatter.tabDelimit.isInstanceOf[Poly1])
}
- test("just flatten") {
+ test("Just flatten") {
val tuple = ("an", 1, "cr", ("x", 3, ("NO", "YES")), "perhaps", "maybe", 3, (0,1))
val flatTuple = ("an", 1, "cr", "x", 3, "NO", "YES", "perhaps", "maybe", 3, 0, 1)
assert(TupleFormatter.flatten(tuple) == flatTuple)
diff --git a/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala b/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala
index 12fc3548..a4fe0473 100644
--- a/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala
+++ b/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala
@@ -23,17 +23,17 @@ import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class JsonUtilsTest extends FunSuite {
- test("proper Map") {
+ test("Proper Map") {
val map: Map[Symbol, Any] = Map('a -> 1, 'b -> 2, 'c -> 3)
assert(JsonUtils.toJson(map) == """{"a":1,"b":2,"c":3}""")
}
- test("any value") {
+ test("Any value") {
val value = 12345
assert(JsonUtils.toJson(value) == "12345")
}
- test("json string") {
+ test("JSON string") {
val jsonString = """{"a":1,"b":2,"c":3}"""
assert(JsonUtils.fromJson(jsonString) == Map("a" -> 1, "b" -> 2, "c" -> 3))
}