From ffef735721ab2448f0e01d4b0c4cc7530a488335 Mon Sep 17 00:00:00 2001 From: Nick Ruest Date: Tue, 21 Jan 2020 17:14:05 -0500 Subject: [PATCH] Clean up test descriptions, addresses #372. (#416) - Clean up test descriptions - Rename typo filename --- .../scala/io/archivesunleashed/ArcTest.scala | 12 +++--- .../archivesunleashed/ArchiveRecordTest.scala | 10 ++--- .../archivesunleashed/CountableRDDTest.scala | 2 +- .../io/archivesunleashed/RecordDFTest.scala | 38 +++++++++--------- .../archivesunleashed/RecordLoaderTest.scala | 2 +- .../io/archivesunleashed/RecordRDDTest.scala | 40 +++++++++---------- .../scala/io/archivesunleashed/WarcTest.scala | 6 +-- .../app/CommandLineAppTest.scala | 2 +- .../app/DomainFrequencyExtractorTest.scala | 2 +- .../app/DomainGraphExtractorDfTest.scala | 2 +- .../app/DomainGraphExtractorTest.scala | 2 +- .../app/ExtractEntitiesTest.scala | 2 +- .../app/ExtractGraphXTest.scala | 6 +-- ....scala => ExtractImageDetailsDFTest.scala} | 2 +- .../app/ExtractPopularImagesDFTest.scala | 2 +- .../app/ExtractPopularImagesRDDTest.scala | 2 +- .../app/PlainTextExtractorTest.scala | 2 +- .../archivesunleashed/app/WriteGEXFTest.scala | 6 +-- .../app/WriteGraphMLTest.scala | 4 +- .../app/WriteGraphTest.scala | 18 ++++----- .../app/WriteGraphXMLTest.scala | 4 +- .../df/DataFrameLoaderTest.scala | 2 +- .../df/ExtractAudioDetailsTest.scala | 2 +- .../df/ExtractDateDFTest.scala | 10 ++--- .../df/ExtractHyperlinksTest.scala | 2 +- .../df/ExtractImageDetailsTest.scala | 2 +- .../df/ExtractImageLinksTest.scala | 2 +- .../df/ExtractPDFDetailsTest.scala | 2 +- ...xtractPresentationProgramDetailsTest.scala | 2 +- .../df/ExtractSpreadsheetDetailsTest.scala | 2 +- .../df/ExtractTextFilesDetailsTest.scala | 6 +-- .../df/ExtractVideoDetailsTest.scala | 2 +- .../df/ExtractWordProcessorDetailsTest.scala | 2 +- .../df/SaveMediaBytesTest.scala | 4 +- .../archivesunleashed/df/SimpleDfTest.scala | 2 +- .../matchbox/ComputeImageSizeTest.scala | 2 +- .../matchbox/ExtractBoilerPipeTextTest.scala | 4 +- .../matchbox/ExtractDateRDDTest.scala | 2 +- .../matchbox/ExtractDomainTest.scala | 8 ++-- .../matchbox/ExtractImageLinksTest.scala | 6 +-- .../matchbox/ExtractLinksTest.scala | 6 +-- .../matchbox/ExtractTextFromPDFsTest.scala | 2 +- .../matchbox/RemoveHTMLTest.scala | 2 +- .../matchbox/RemoveHTTPHeaderTest.scala | 2 +- .../matchbox/StringUtilsTest.scala | 8 ++-- .../matchbox/TupleFormatterTest.scala | 4 +- .../util/JsonUtilsTest.scala | 6 +-- 47 files changed, 130 insertions(+), 130 deletions(-) rename src/test/scala/io/archivesunleashed/app/{ExtarctImageDetailsDFTest.scala => ExtractImageDetailsDFTest.scala} (95%) diff --git a/src/test/scala/io/archivesunleashed/ArcTest.scala b/src/test/scala/io/archivesunleashed/ArcTest.scala index 0daac6b3..ee19ae11 100644 --- a/src/test/scala/io/archivesunleashed/ArcTest.scala +++ b/src/test/scala/io/archivesunleashed/ArcTest.scala @@ -41,11 +41,11 @@ class ArcTest extends FunSuite with BeforeAndAfter { val dayMonthTestA = "200805" - test("count records") { + test("Count records") { assert(RecordLoader.loadArchives(arcPath, sc).count == 300L) } - test("filter date") { + test("Filter date RDD") { val startSS = 0 val monthSS = 6 val four = RecordLoader.loadArchives(arcPath, sc) @@ -62,7 +62,7 @@ class ArcTest extends FunSuite with BeforeAndAfter { five.foreach(date => assert(date.substring(startSS, monthSS) == dayMonthTestA)) } - test("filter url pattern") { + test("Filter URL pattern RDD") { val keepMatches = RecordLoader.loadArchives(arcPath, sc) .keepUrlPatterns(Set("http://www.archive.org/about/.*".r)) val discardMatches = RecordLoader.loadArchives(arcPath, sc) @@ -71,14 +71,14 @@ class ArcTest extends FunSuite with BeforeAndAfter { assert(discardMatches.count == 284L) } - test("count links") { + test("Count links RDD") { val links = RecordLoader.loadArchives(arcPath, sc) .map(r => ExtractLinksRDD(r.getUrl, r.getContentString)) .reduce((a, b) => a ++ b) assert(links.size == 664) } - test("detect language") { + test("Detect language RDD") { val languageCounts = RecordLoader.loadArchives(arcPath, sc) .keepMimeTypes(Set("text/html")) .map(r => RemoveHTMLRDD(r.getContentString)) @@ -99,7 +99,7 @@ class ArcTest extends FunSuite with BeforeAndAfter { } } - test("detect mime type tika") { + test("Detect MIMEtype Tika RDD") { val mimeTypeCounts = RecordLoader.loadArchives(arcPath, sc) .map(r => RemoveHTTPHeaderRDD(r.getContentString)) .groupBy(content => DetectMimeTypeTika(content.getBytes)) diff --git a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala index 14521416..0cdc196b 100644 --- a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala +++ b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala @@ -46,12 +46,12 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("count records") { + test("Count records") { assert(RecordLoader.loadArchives(arcPath, sc).count == 300L) assert(RecordLoader.loadArchives(warcPath, sc).count == 299L) } - test("Resource name produces expected result.") { + test("Resource name produces expected result") { val textSampleArc = RecordLoader.loadArchives(arcPath, sc) .map(x => FilenameUtils.getName(x.getArchiveFilename)) .take(3) @@ -81,7 +81,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter { assert(textSampleWarc.deep == Array("", exampleUrl, exampleUrl).deep) } - test("Urls") { + test("URLs") { val textSampleArc = RecordLoader.loadArchives(arcPath, sc) .map(x => x.getUrl).take(3) val textSampleWarc = RecordLoader.loadArchives(warcPath, sc) @@ -92,7 +92,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter { "http://www.archive.org/robots.txt", "http://www.archive.org/").deep) } - test("Mime-Type") { + test("MIMEtype") { val textSampleArc = RecordLoader.loadArchives(arcPath, sc) .map(x => x.getMimeType).take(3) val textSampleWarc = RecordLoader.loadArchives(warcPath, sc) @@ -103,7 +103,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter { "text/html").deep) } - test("Get Http Status") { + test("Get HTTP status") { val textSampleArc = RecordLoader.loadArchives(arcPath, sc) .map(x => x.getHttpStatus).take(3) val textSampleWarc = RecordLoader.loadArchives(warcPath, sc) diff --git a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala index 57aff922..2f56aeb2 100644 --- a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala @@ -38,7 +38,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("count records") { + test("Count records; Extract Domain RDD ") { val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() .map(r => ExtractDomainRDD(r.getUrl)) diff --git a/src/test/scala/io/archivesunleashed/RecordDFTest.scala b/src/test/scala/io/archivesunleashed/RecordDFTest.scala index 0d9dff20..774587f4 100644 --- a/src/test/scala/io/archivesunleashed/RecordDFTest.scala +++ b/src/test/scala/io/archivesunleashed/RecordDFTest.scala @@ -39,7 +39,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("keep Valid Pages") { + test("Keep valid pages DF") { val expected = "http://www.archive.org/" val base = RecordLoader.loadArchives(arcPath, sc) .all() @@ -48,7 +48,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard MimeTypes") { + test("Discard MIMEtypes DF") { val expected = "filedesc://IAH-20080430204825-00000-blackbook.arc" val mimeTypes = Set("text/html") val base = RecordLoader.loadArchives(arcPath, sc) @@ -59,7 +59,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard Date") { + test("Discard date DF") { val expected = "20080430" val date = "20080429" val base = RecordLoader.loadArchives(arcPath, sc) @@ -70,7 +70,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard Urls") { + test("Discard URLs DF") { val expected = "http://www.archive.org/index.php" val url = Set("http://www.archive.org/") val base = RecordLoader.loadArchives(arcPath, sc) @@ -81,7 +81,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard Domains") { + test("Discard domains DF") { val expected = "http://www.hideout.com.br/" val domain = Set("www.archive.org") val base = RecordLoader.loadArchives(arcPath, sc) @@ -92,7 +92,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard HttpStatus") { + test("Discard HTTP status DF") { val expected = "200" val statusCode = Set("000") val base = RecordLoader.loadArchives(arcPath, sc) @@ -103,7 +103,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard Content") { + test("Discard content DF") { val expected = "dns:www.archive.org" val contentRegex = Set("Content-Length: [0-9]{4}".r) val base = RecordLoader.loadArchives(arcPath, sc) @@ -115,7 +115,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard UrlPatterns") { + test("Discard URL patterns DF") { val expected = "dns:www.archive.org" val urlRegex = Set(".*images.*".r) val base = RecordLoader.loadArchives(arcPath, sc) @@ -127,7 +127,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Discard Languages") { + test("Discard languages DF") { val expected = "dns:www.archive.org" val languages = Set("th","de","ht") val base = RecordLoader.loadArchives(arcPath, sc) @@ -139,7 +139,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep HttpStatus") { + test("Keep HTTP status DF") { val expected = "http://www.archive.org/robots.txt" val statusCode = Set("200") val base = RecordLoader.loadArchives(arcPath, sc) @@ -150,7 +150,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep Date") { + test("Keep date DF") { val expected = "http://www.archive.org/" val month = List("04") val base = RecordLoader.loadArchives(arcPath, sc) @@ -161,7 +161,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep Urls") { + test("Keep URLs DF") { val expected = "http://www.archive.org/" val url = Set("http://www.archive.org/") val base = RecordLoader.loadArchives(arcPath, sc) @@ -172,7 +172,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep Domains") { + test("Keep domains DF") { val expected = "http://www.archive.org/robots.txt" val domain = Set("www.archive.org") val base = RecordLoader.loadArchives(arcPath, sc) @@ -183,7 +183,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep MimeTypesTika") { + test("Keep MIMEtypes Tika DF") { val expected = "image/jpeg" val mimeType = Set("image/jpeg") val base = RecordLoader.loadArchives(arcPath, sc) @@ -194,7 +194,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep MimeTypes") { + test("Keep MIMEtypes DF") { val expected = "text/html" val mimeType = Set("text/html") val base = RecordLoader.loadArchives(arcPath, sc) @@ -205,7 +205,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep content") { + test("Keep content DF") { val expected = "http://www.archive.org/images/logoc.jpg" val contentRegex = Set("Content-Length: [0-9]{4}".r) val base = RecordLoader.loadArchives(arcPath, sc) @@ -217,7 +217,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep UrlPatterns") { + test("Keep URL patterns DF") { val expected = "http://www.archive.org/images/go-button-gateway.gif" val urlRegex = Set(".*images.*".r) val base = RecordLoader.loadArchives(arcPath, sc) @@ -229,7 +229,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep Languages") { + test("Keep languages DF") { val expected = "http://www.archive.org/images/logoc.jpg" val languages = Set("th","de","ht") val base = RecordLoader.loadArchives(arcPath, sc) @@ -241,7 +241,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter { assert (base.toString == expected) } - test("Keep keepMimeTypes") { + test("Keep images DF") { val expected = "image/jpeg" val base = RecordLoader.loadArchives(arcPath, sc) .all() diff --git a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala index a784a6fe..9260e666 100644 --- a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala +++ b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala @@ -37,7 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("loads Warc") { + test("Load WARC") { val base = RecordLoader.loadArchives(warcPath, sc) .keepValidPages() .map(x => x.getUrl) diff --git a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala index f5b89ea0..4121513c 100644 --- a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala @@ -43,21 +43,21 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("no valid pages") { + test("Expect no valid pages RDD") { val expectedLength = 0 val base = RecordLoader.loadArchives(badPath, sc) .keepValidPages().take(2) assert (base.length == expectedLength) } - test ("no images") { + test ("Expect no images RDD") { val expectedLength = 0 val base = RecordLoader.loadArchives(badPath, sc) .keepValidPages().take(2) assert (base.length == expectedLength) } - test("keep date") { + test("Keep date RDD") { val testDate = "2008" val base = RecordLoader.loadArchives(arcPath, sc) val component = DateComponent.YYYY @@ -68,7 +68,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { .map ( mp => mp.getUrl).take(3) assert (r2.sameElements(r)) } - test ("keep http status codes") { + test ("Keep HTTP status codes RDD") { val expected = 94 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -77,7 +77,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("keep urls") { + test ("Keep URLs RDD") { val expected = 1 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -86,7 +86,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("keep url patterns") { + test ("Keep URL patterns RDD") { val expected = 1 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -95,7 +95,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("check for domains") { + test ("Keep domains RDD") { val expected = 91 val base2 = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -104,7 +104,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (x2 == expected ) } - test ("keep languages") { + test ("Keep languages RDD") { val base2 = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() val langs: Set[String] = Set("en", "fr") @@ -115,7 +115,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2.sameElements(r)) } - test ("discard languages") { + test ("Discard languages RDD") { val base2 = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() val langs: Set[String] = Set("fr") @@ -125,7 +125,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2.sameElements(r)) } - test ("keep mime tika") { + test ("Keep MIMEtype Tika RDD") { val base = RecordLoader.loadArchives(arcPath, sc) val mime = Set ("text/plain", "image/jpeg") val r2 = base.keepMimeTypesTika(mime) @@ -135,7 +135,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { "http://www.archive.org/images/logoc.jpg").deep) } - test ("keep mime web server") { + test ("Keep MIMEtype RDD") { val base = RecordLoader.loadArchives(arcPath, sc) val mime = Set ("text/plain", "image/jpeg") val r2 = base.keepMimeTypes(mime) @@ -145,7 +145,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { "http://www.archive.org/images/logoc.jpg").deep) } - test ("check for keep content"){ + test ("Keep content RDD"){ val expected = 1 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -156,7 +156,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (y1 == expected) } - test ("discard mime web server") { + test ("Discard MIMEtype RDD") { val base = RecordLoader.loadArchives(arcPath, sc) val mime = Set ("text/plain", "image/jpeg") val r2 = base.discardMimeTypes(mime) @@ -165,7 +165,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { "http://www.archive.org/index.php").deep) } - test ("discard mime tika") { + test ("Discard MIMEtype Tika RDD") { val base = RecordLoader.loadArchives(arcPath, sc) val mime = Set ("text/plain", "image/jpeg") val r2 = base.discardMimeTypesTika(mime) @@ -174,7 +174,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { "http://www.archive.org/", "http://www.archive.org/index.php").deep) } - test ("discard date") { + test ("Discard date RDD") { val base = RecordLoader.loadArchives(arcPath, sc) val date = "20080430" val r = base.filter( x=> x.getCrawlDate != date).collect() @@ -182,7 +182,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r.deep == Array().deep) } - test ("discard urls") { + test ("Discard URLs RDD") { val expected = 94 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -191,7 +191,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("discard url patterns") { + test ("Discard URL patterns RDD") { val expected = 93 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -200,7 +200,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("discard http status codes") { + test ("Discard HTTP status codes RDD") { val expected = 46 val base = RecordLoader.loadArchives(arcPath, sc) val statusCodes: Set[String] = Set ("200", "404") @@ -208,7 +208,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("discard domains") { + test ("Discard domains RDD") { val expected = 94 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() @@ -217,7 +217,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { assert (r2 == expected) } - test ("discard content") { + test ("Discard content RDD") { val expected = 93 val base = RecordLoader.loadArchives(arcPath, sc) .keepValidPages() diff --git a/src/test/scala/io/archivesunleashed/WarcTest.scala b/src/test/scala/io/archivesunleashed/WarcTest.scala index f79b936f..5eb2b112 100644 --- a/src/test/scala/io/archivesunleashed/WarcTest.scala +++ b/src/test/scala/io/archivesunleashed/WarcTest.scala @@ -41,11 +41,11 @@ class WarcTest extends FunSuite with BeforeAndAfter { records = RecordLoader.loadArchives(warcPath, sc) } - test("count records") { + test("Count records") { assert(299L == records.count) } - test("warc extract domain") { + test("WARC extract domain RDD") { val take = 10 val expectedLength = 3 val r = records @@ -57,7 +57,7 @@ class WarcTest extends FunSuite with BeforeAndAfter { assert(r.length == expectedLength) } - test("warc get content") { + test("WARC get content RDD") { val a = RecordLoader.loadArchives(warcPath, sc) .map(r => r.getContentString) .take(1) diff --git a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala index 7c898f59..efaa5565 100644 --- a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala +++ b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala @@ -70,7 +70,7 @@ class CommandLineAppTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("command line app tests") { + test("Command line app functionality tests") { for {a <- testSuccessCmds} { app.CommandLineAppRunner.test(a, sc) assert(Files.exists(Paths.get(outputDir))) diff --git a/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala index 0ec4d047..716cdabd 100644 --- a/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/DomainFrequencyExtractorTest.scala @@ -38,7 +38,7 @@ class DomainFrequencyExtractorTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("DomainFrequencyExtractor") { + test("Domain frequency extractor RDD & DF") { val rdd = RecordLoader.loadArchives(arcPath, sc).keepValidPages() val df = RecordLoader.loadArchives(arcPath, sc).webpages() diff --git a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala index 6d17d690..42685824 100644 --- a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala +++ b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorDfTest.scala @@ -38,7 +38,7 @@ class DomainGraphExtractorDfTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Web Graph Extractor") { + test("Domain graph extractor DF") { val TESTLENGTH = 166 val TESTRESULT = 280 val df = RecordLoader.loadArchives(arcPath, sc).webgraph() diff --git a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala index 76402b81..3fa14ad7 100644 --- a/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/DomainGraphExtractorTest.scala @@ -38,7 +38,7 @@ class DomainGraphExtractorTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("DomainGraphExtractor") { + test("Domain graph extractor RDD") { val rdd = RecordLoader.loadArchives(arcPath, sc) val rddResult = DomainGraphExtractor(rdd).collect() diff --git a/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala index ed4da00b..9a57d882 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractEntitiesTest.scala @@ -51,7 +51,7 @@ class ExtractEntitiesTest extends FunSuite with BeforeAndAfter { LOG.info("Output can be found in " + tempDir.getPath) } - test("Extract from Record") { + test("Extract entities from record") { val e = ExtractEntities.extractFromRecords(iNerClassifierFile, archivePath, tempDir + "/scrapeArcEntities", sc).take(3).last assert(e._1 == "hello") } diff --git a/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala index 58736e30..a77d4e8f 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractGraphXTest.scala @@ -48,7 +48,7 @@ import scala.util.Try sc = new SparkContext(conf) } - test ("Case classes are empty") { + test ("Check for empty classes") { val pageRank = 0.56 val weak = 4 val strong = 5 @@ -65,7 +65,7 @@ import scala.util.Try assert(testPR.strong == strong) } - test("creates a network with pagerank scores") { + test("Create a network with pagerank scores") { val pageRank = 0.9943090942904987 val connected = -649648005 val minEdges = 5 @@ -83,7 +83,7 @@ import scala.util.Try assert(pRank(0)._2.strong == connected) } - test("creates a network using dynamic PR") { + test("Create a network using dynamic pagerank") { val dynPageRank = 0.9999999999999986 val connected = -1054421350 val minEdges = 5 diff --git a/src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala similarity index 95% rename from src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala rename to src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala index bfb69539..66df8a2d 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtarctImageDetailsDFTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractImageDetailsDFTest.scala @@ -37,7 +37,7 @@ class ExtractImageDetailsDFTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("extracts Image Details") { + test("Extracts image details DF") { val exampledf = RecordLoader.loadArchives(arcPath, sc).keepImages().all() val imageDetails = ExtractImageDetailsDF(exampledf) val response1 = "http://www.archive.org/images/logoc.jpg" diff --git a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala index 669fa192..c12c3f92 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesDFTest.scala @@ -39,7 +39,7 @@ class ExtractPopularImagesDFTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("extracts popular images") { + test("Extract popular images DF") { val highTest = 507 val exampledf = RecordLoader.loadArchives(arcPath, sc).images() val imagesLowLimit = ExtractPopularImagesDF(exampledf, 3) diff --git a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala index 38ffd509..ab3040e4 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesRDDTest.scala @@ -39,7 +39,7 @@ class ExtractPopularImagesRDDTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("extracts popular images") { + test("Extract popular images RDD") { val highTest = 507 val examplerdd = RecordLoader.loadArchives(arcPath, sc) val imagesLowLimit = ExtractPopularImagesRDD(examplerdd, 3, sc) diff --git a/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala index 1cd66fbd..21b5ac81 100644 --- a/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/PlainTextExtractorTest.scala @@ -38,7 +38,7 @@ class PlainTextExtractorTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("PlainTextExtractorTest") { + test("Plain text extractor RDD & DF") { val rdd = RecordLoader.loadArchives(arcPath, sc).keepValidPages() val df = RecordLoader.loadArchives(arcPath, sc).webpages() val rddResults = PlainTextExtractor(rdd).collect() diff --git a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala index 45eeae8b..73220e9f 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala @@ -45,7 +45,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{ sc = new SparkContext(conf) } - test("creates the file") { + test("Creates the GEXF file") { val testLines = (0, 12, 22, 34) val networkrdd = sc.parallelize(network) WriteGEXF(networkrdd, testFile) @@ -57,7 +57,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{ assert(lines(testLines._4) == """""") } - test("creates the file from Array[Row]") { + test("Creates the GEXF file from Array[Row]") { val testLines = (0, 12, 22, 34) if (Files.exists(Paths.get(testFile))) { new File(testFile).delete() @@ -74,7 +74,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{ assert(!WriteGEXF(networkarray ,"")) } - test ("returns a Bool depending on pass or failure") { + test("Test if GEXF path is empty") { val networkrdd = sc.parallelize(network) val gexf = WriteGEXF(networkrdd, testFile) assert(gexf) diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala index 5d788980..576fdfcf 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala @@ -46,7 +46,7 @@ class WriteGraphMLTest extends FunSuite with BeforeAndAfter{ sc = new SparkContext(conf) } - test("creates the file") { + test("Create GraphML the file") { val networkrdd = sc.parallelize(network) val lineCheck = (0, 15, 22, 30) WriteGraphML(networkrdd, testFile) @@ -58,7 +58,7 @@ class WriteGraphMLTest extends FunSuite with BeforeAndAfter{ assert(lines(lineCheck._4) == """3""") } - test ("returns a Bool depending on pass or failure") { + test ("Test if GraphML path is empty") { val networkrdd = sc.parallelize(network) val graphml = WriteGraphML(networkrdd, testFile) assert(graphml) diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala index 46029be9..2be89d51 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGraphTest.scala @@ -64,7 +64,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ sc = new SparkContext(conf) } - test("creates the file") { + test("Create the graph file") { val testLines = (0, 12, 22, 34) val networkrdd = sc.parallelize(network) WriteGraph.asGexf(networkrdd, testFile) @@ -76,7 +76,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert(lines(testLines._4) == """""") } - test("creates the file from Array[Row]") { + test("Create the graph file from Array[Row]") { val testLines = (0, 12, 22, 34) if (Files.exists(Paths.get(testFile))) { new File(testFile).delete() @@ -94,14 +94,14 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert(!WriteGraph(networkarray, "")) } - test ("returns a Bool depending on pass or failure") { + test ("Test if GEXF path is empty") { val networkrdd = sc.parallelize(network) val gexf = WriteGraph.asGexf(networkrdd, testFile) assert(gexf) assert(!WriteGraph.asGexf(networkrdd, "")) } - test ("Nodes zip with ids") { + test ("Nodes ZIP with IDs") { val networkrdd = sc.parallelize(networkWithDuplication) val nodeIds = WriteGraph.nodesWithIds(networkrdd).collect val expected = ("Source3", 0) @@ -120,7 +120,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert (WriteGraph.nodeIdFromLabel(Option(null)) == -1) } - test ("Gets the id from a lookup") { + test ("Gets the ID from a lookup") { val nodes = WriteGraph.nodesWithIds(sc.parallelize(network)) val empty = -1 val expected = 6 @@ -130,7 +130,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert (WriteGraph.nodeIdFromLabel(badlookup) == empty) } - test ("Edge ids are captured from lookup") { + test ("Edge IDs are captured from lookup") { val edges = WriteGraph.edgeNodes(sc.parallelize(network)) val expected = Array((date1, 6, 3, 3), (date2, 7, 4, 4), @@ -138,7 +138,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert(edges.collect.deep == expected) } - test ("Graphml produces correct output") { + test ("GraphML produces correct output") { val testLines = (0, 12, 30, 37) val networkrdd = sc.parallelize(network) WriteGraph.asGraphml(networkrdd, testFile) @@ -150,7 +150,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert(lines(testLines._4) == """""") } - test ("Graphml works with unescaped xml data") { + test ("GraphML works with unescaped XML data") { val testLines = (0, 12, 30, 37) val networkrdd = sc.parallelize(unescapedNetwork) WriteGraph.asGraphml(networkrdd, testFile) @@ -162,7 +162,7 @@ class WriteGraphTest extends FunSuite with BeforeAndAfter{ assert(lines(testLines._4) == """""") } - test( "Gexf works with unescaped xml data") { + test( "GEXF works with unescaped XML data") { val testLines = (0, 12, 29, 31) val networkrdd = sc.parallelize(unescapedNetwork) WriteGraph(networkrdd, testFile2) diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala index b7083127..27337895 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGraphXMLTest.scala @@ -42,7 +42,7 @@ class WriteGraphXMLTest extends FunSuite with BeforeAndAfter{ sc = new SparkContext(conf) } - test("creates the file") { + test("Create the graph XML file") { val headerLocation = 0 val expectedLine = 13 val networkrdd = ExtractGraphX.extractGraphX(sc.parallelize(network)) @@ -54,7 +54,7 @@ class WriteGraphXMLTest extends FunSuite with BeforeAndAfter{ assert(lines(expectedLine) == """""") } - test ("returns a Bool depending on pass or failure") { + test ("Test if GraphML path is empty") { val networkrdd = ExtractGraphX.extractGraphX(sc.parallelize(network)) val pRank = ExtractGraphX.runPageRankAlgorithm(networkrdd) val graphml = WriteGraphXML(pRank, testFile) diff --git a/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala b/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala index 3ca90546..6b6e14cc 100644 --- a/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala +++ b/src/test/scala/io/archivesunleashed/df/DataFrameLoaderTest.scala @@ -44,7 +44,7 @@ class DataFrameLoaderTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Test DataFrameLoader") { + test("Test DataFrameLoader (connection to PySpark)") { val df = new DataFrameLoader(sc) val validPages = df.webpages(arcPath) val hyperlinks = df.webgraph(arcPath) diff --git a/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala index 9ea899da..beca222b 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractAudioDetailsTest.scala @@ -38,7 +38,7 @@ class AudioTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Audio DF extraction") { + test("Audio files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .audio() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala index bb660f7e..3f960d4a 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractDateDFTest.scala @@ -39,7 +39,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Extract Dates DataFrame YYYY") { + test("Extract dates YYYY DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() @@ -77,7 +77,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter { assert(results(2).get(3) == "http://www.sloan.org") } - test("Extract Dates DataFrame YYYYMM") { + test("Extract dates YYYYMM DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() @@ -115,7 +115,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter { assert(results(2).get(3) == "http://www.sloan.org") } - test("Extract Dates DataFrame MM") { + test("Extract dates MM DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() @@ -153,7 +153,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter { assert(results(2).get(3) == "http://www.sloan.org") } - test("Extract Dates DataFrame DD") { + test("Extract dates DD DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() @@ -191,7 +191,7 @@ class ExtractDateDFTest extends FunSuite with BeforeAndAfter { assert(results(2).get(3) == "http://www.sloan.org") } - test("Extract Dates DataFrame YYYYMMDD") { + test("Extract dates YYYYMMDD DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala index 4abd7021..51fa100b 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractHyperlinksTest.scala @@ -39,7 +39,7 @@ class ExtractHyperlinksTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("count records") { + test("Extract links DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala index ebe754e7..843635ae 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractImageDetailsTest.scala @@ -38,7 +38,7 @@ class ExtractImageDetailsTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Image DF extraction") { + test("Image files extraction DF") { val df = RecordLoader.loadArchives(arcPath, sc) .images() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala index 07ad0efe..f7e9453a 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractImageLinksTest.scala @@ -38,7 +38,7 @@ class ImageLinksTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Fetch image links") { + test("Image links extraction DF") { val df = RecordLoader.loadArchives(arcPath, sc) .imageLinks() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala index 15d31485..b17fb32a 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractPDFDetailsTest.scala @@ -38,7 +38,7 @@ class ExtractPDFDetailsTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("PDF DF extraction") { + test("PDF files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .pdfs() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala index 26821a22..4168e5a6 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractPresentationProgramDetailsTest.scala @@ -38,7 +38,7 @@ class PresentationProgramFilesTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Presentation Program DF extraction") { + test("Presentation program files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .presentationProgramFiles() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala index 569362e2..c326b021 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractSpreadsheetDetailsTest.scala @@ -38,7 +38,7 @@ class ExtractSpreadsheetDetailsTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Spreadsheet DF extraction") { + test("Spreadsheet files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .spreadsheets() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala index c54b923e..97523fdc 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractTextFilesDetailsTest.scala @@ -40,7 +40,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers { sc = new SparkContext(conf) } - test("Text Files DF extraction") { + test("Text files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .textFiles() @@ -56,7 +56,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers { assert("32abd404fb560ecf14b75611f3cc5c2c" == extracted(0)(5)) } - test("Text Files DF robots.txt") { + test("Text files robots.txt DF") { val df = RecordLoader.loadArchives(testPath, sc) .textFiles() @@ -75,7 +75,7 @@ class TextFilesTest extends FunSuite with BeforeAndAfter with Matchers { robots(1)(0).toString should not include (".html") } - test("Text Files DF dns or filedesc") { + test("Text files dns or filedesc DF") { val df = RecordLoader.loadArchives(filedescPath, sc) .textFiles() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala index b2521df4..be1048b0 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractVideoDetailsTest.scala @@ -38,7 +38,7 @@ class VideoTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Video DF extraction") { + test("Video files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .videos() diff --git a/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala b/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala index e623416f..a9d474de 100644 --- a/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala +++ b/src/test/scala/io/archivesunleashed/df/ExtractWordProcessorDetailsTest.scala @@ -38,7 +38,7 @@ class WordProcessorFilesTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Word Processor DF extraction") { + test("Word processor files extraction DF") { val df = RecordLoader.loadArchives(warcPath, sc) .wordProcessorFiles() diff --git a/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala b/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala index 8be9ba51..a0f9abc0 100644 --- a/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala +++ b/src/test/scala/io/archivesunleashed/df/SaveMediaBytesTest.scala @@ -49,7 +49,7 @@ class SaveMediaBytesTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("Save audio bytes to disk") { + test("Save audio bytes to disk DF") { val df = RecordLoader.loadArchives(warcPath, sc) .audio() @@ -67,7 +67,7 @@ class SaveMediaBytesTest extends FunSuite with BeforeAndAfter { Files.delete(Paths.get(fileName)) } - test("Attempt to save invalid audio") { + test("Attempt to save invalid audio DF") { val dummyEncBytes = Base64.getEncoder.encodeToString(Array.range(0, 127) .map(_.toByte)) val dummyMD5 = ComputeMD5RDD(dummyEncBytes.getBytes) diff --git a/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala b/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala index db0b07e1..a2f9f397 100644 --- a/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala +++ b/src/test/scala/io/archivesunleashed/df/SimpleDfTest.scala @@ -39,7 +39,7 @@ class SimpleDfTest extends FunSuite with BeforeAndAfter { sc = new SparkContext(conf) } - test("count records") { + test("Count records DF") { val df = RecordLoader.loadArchives(arcPath, sc) .webpages() diff --git a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala index b2c58100..3b7981ca 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala @@ -36,7 +36,7 @@ class ComputeImageSizeTest extends FunSuite { var image: Array[Byte] = ios.toByteArray(); ios.close() - test ("check images") { + test ("Check images and provide size RDD") { val imageSize = (10, 10) val emptyImageSize = (0, 0) assert(ComputeImageSize(image) == imageSize) diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala index 49be1e2f..db1ea680 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala @@ -31,13 +31,13 @@ class ExtractBoilerPipeTextTest extends FunSuite {
Copyright 2017
""" var boiler = """Copyright 2017""" - test("Collects boilerpipe") { + test("Collects boilerpipe RDD") { assert(ExtractBoilerpipeTextRDD(text) == boiler) assert(ExtractBoilerpipeTextRDD("") == "") assert(ExtractBoilerpipeTextRDD("All Rights Reserved.") == "") } - test("Removes Header information") { + test("Removes HTTP header information RDD") { assert(ExtractBoilerpipeTextRDD(header + text) == boiler) } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala index 8f15aace..7cdaa278 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateRDDTest.scala @@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class ExtractDateRDDTest extends FunSuite { - test("RDD date extraction") { + test("Date extraction RDD") { val date = "20151204" val startSS = 0 val yearSS = 4 diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala index af7fe246..23249027 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala @@ -42,26 +42,26 @@ class ExtractDomainRDDTest extends FunSuite { private val data3 = Seq.newBuilder.+=( ("http://www.seetorontonow.canada-booknow.com\\booking_results.php", "www.seetorontonow.canada-booknow.com")).result() - test("simple") { + test("Extract simple domain extraction RDD") { data1.foreach { case (link, domain) => assert(ExtractDomainRDD(link) == domain) } } - test("withBase") { + test("Extract domains with base RDD") { data2.foreach { case (link, base, domain) => assert(ExtractDomainRDD(link, base) == domain) } } - test("error") { + test("Test for domain errors RDD") { // scalastyle:off null assert(ExtractDomainRDD(null) == "") assert(ExtractDomainRDD(index, null) == "") // scalastyle:on null } - test("backslash") { + test("Test for domain backslash RDD") { data3.foreach { case (link, domain) => assert(ExtractDomainRDD(link) == domain) } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala index 37343524..d13eeb36 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala @@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class ExtractImageLinksRDDTest extends FunSuite { - test("simple") { + test("Extract simple image links RDD") { val fragment = """Image here: and another """ val extracted = ExtractImageLinksRDD("", fragment).toList @@ -33,7 +33,7 @@ class ExtractImageLinksRDDTest extends FunSuite { assert("http://baz.org/a/b/banner.jpg" == extracted(1)) } - test("relative") { + test("Extract relative image links RDD") { val fragment = """Image here: and another and """ val extracted = ExtractImageLinksRDD("http://foo.bar.com/a/page.html", fragment) @@ -43,7 +43,7 @@ class ExtractImageLinksRDDTest extends FunSuite { assert("http://foo.bar.com/logo.gif" == extracted(2)) } - test("errors") { + test("Test image link errors RDD") { val fragment = """Image here: and another and """ assert(ExtractImageLinksRDD("", "") == Nil) diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala index 4b18727d..9bfeef84 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala @@ -35,7 +35,7 @@ class ExtractLinksRDDTest extends FunSuite { val twitter = "http://www.twitter.com/" val head = "a search engine" - test("simple") { + test("Extract simple links RDD") { val extracted: Seq[(String, String, String)] = ExtractLinksRDD("", fragment) assert(extracted.size == 2) assert(url == extracted.head._2) @@ -44,7 +44,7 @@ class ExtractLinksRDDTest extends FunSuite { assert("Twitter" == extracted.last._3) } - test("relative") { + test("Extract relative links RDD") { val fragmentLocal: String = "Here is " + "a search engine.\nHere is a a relative URL.\n" val fooFragmentLocal = "http://www.foobar.org/page.html" @@ -56,7 +56,7 @@ class ExtractLinksRDDTest extends FunSuite { assert("a relative URL" == extracted.last._3) } - test("errors") { + test("Test link errors RDD") { val bytes: Array[Byte] = "wronglyTyped".getBytes() val invalid: String = "Here is a fake url bogus search engine." // scalastyle:off null diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala index 38c9b984..3e38bc24 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractTextFromPDFsTest.scala @@ -24,7 +24,7 @@ import org.scalatest.Matchers @RunWith(classOf[JUnitRunner]) class ExtractTextFromPDFsTest extends FunSuite with Matchers { - test("get parser") { + test("Get PDF parser") { ExtractTextFromPDFs.pdfParser shouldBe a[PDFParser] } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala index a1a8a5ef..bb96b02b 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala @@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class RemoveHTMLRDDTest extends FunSuite { - test("simple") { + test("Remove HTML RDD") { val html = """ diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala index 81d68d07..c7c6ae25 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTTPHeaderTest.scala @@ -22,7 +22,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class RemoveHTTPHeaderRDDTest extends FunSuite { - test("simple") { + test("Remove HTTP header RDD") { val header = "HTTP/1.1 200 OK\r\n\r\nHello content" val nohttp = "This has no Http" val removed = RemoveHTTPHeaderRDD(header) diff --git a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala index 70dbb4ed..3a3c7cb9 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala @@ -24,7 +24,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class StringUtilsTest extends FunSuite { - test("remove prefix") { + test("Remove prefix") { val s: String = "www.example.com" // scalastyle:off null val n: String = null @@ -33,7 +33,7 @@ class StringUtilsTest extends FunSuite { assert(n.removePrefixWWW() == "") } - test("create hash") { + test("Create hash") { val invalid: String = "AC&D\""; // scalastyle:off null val except: String = null; @@ -43,11 +43,11 @@ class StringUtilsTest extends FunSuite { assert (caught.getMessage == "Caught exception processing input row "); } - test ("md5 hash") { + test ("MD5 hash") { val s: String = "unesco.org"; assert(ComputeMD5RDD(s.getBytes) == "8e8decc8e8107bcf9d3896f3222b77d8"); } - test ("sh1 hash") { + test ("SHA1 hash") { val s: String = "unesco.org"; assert(ComputeSHA1RDD(s.getBytes) == "2d0e5377157172045d87befe46e157cda42c4f6e"); } diff --git a/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala b/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala index a2f9337b..f9c23ddd 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/TupleFormatterTest.scala @@ -29,13 +29,13 @@ import ops.tuple.ToList @RunWith(classOf[JUnitRunner]) class TupleFormatterTest extends FunSuite with Matchers { - test("tab delimit") { + test("Tab delimit") { val tuple = (("ab", "bl", ("c", 9)), "d", 5, ("hi", 1)) assert(TupleFormatter.tabDelimit(tuple) == "ab\tbl\tc\t9\td\t5\thi\t1") assert(TupleFormatter.tabDelimit.isInstanceOf[Poly1]) } - test("just flatten") { + test("Just flatten") { val tuple = ("an", 1, "cr", ("x", 3, ("NO", "YES")), "perhaps", "maybe", 3, (0,1)) val flatTuple = ("an", 1, "cr", "x", 3, "NO", "YES", "perhaps", "maybe", 3, 0, 1) assert(TupleFormatter.flatten(tuple) == flatTuple) diff --git a/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala b/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala index 12fc3548..a4fe0473 100644 --- a/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala +++ b/src/test/scala/io/archivesunleashed/util/JsonUtilsTest.scala @@ -23,17 +23,17 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class JsonUtilsTest extends FunSuite { - test("proper Map") { + test("Proper Map") { val map: Map[Symbol, Any] = Map('a -> 1, 'b -> 2, 'c -> 3) assert(JsonUtils.toJson(map) == """{"a":1,"b":2,"c":3}""") } - test("any value") { + test("Any value") { val value = 12345 assert(JsonUtils.toJson(value) == "12345") } - test("json string") { + test("JSON string") { val jsonString = """{"a":1,"b":2,"c":3}""" assert(JsonUtils.fromJson(jsonString) == Map("a" -> 1, "b" -> 2, "c" -> 3)) }