Skip to content

Commit

Permalink
Clean up test descriptions, addresses #372. (#416)
Browse files Browse the repository at this point in the history
- Clean up test descriptions
- Rename typo filename
  • Loading branch information
ruebot authored and ianmilligan1 committed Jan 21, 2020
1 parent 71b459c commit ffef735
Show file tree
Hide file tree
Showing 47 changed files with 130 additions and 130 deletions.
12 changes: 6 additions & 6 deletions src/test/scala/io/archivesunleashed/ArcTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class ArcTest extends FunSuite with BeforeAndAfter {

val dayMonthTestA = "200805"

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
}

test("filter date") {
test("Filter date RDD") {
val startSS = 0
val monthSS = 6
val four = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -62,7 +62,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
five.foreach(date => assert(date.substring(startSS, monthSS) == dayMonthTestA))
}

test("filter url pattern") {
test("Filter URL pattern RDD") {
val keepMatches = RecordLoader.loadArchives(arcPath, sc)
.keepUrlPatterns(Set("http://www.archive.org/about/.*".r))
val discardMatches = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -71,14 +71,14 @@ class ArcTest extends FunSuite with BeforeAndAfter {
assert(discardMatches.count == 284L)
}

test("count links") {
test("Count links RDD") {
val links = RecordLoader.loadArchives(arcPath, sc)
.map(r => ExtractLinksRDD(r.getUrl, r.getContentString))
.reduce((a, b) => a ++ b)
assert(links.size == 664)
}

test("detect language") {
test("Detect language RDD") {
val languageCounts = RecordLoader.loadArchives(arcPath, sc)
.keepMimeTypes(Set("text/html"))
.map(r => RemoveHTMLRDD(r.getContentString))
Expand All @@ -99,7 +99,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
}
}

test("detect mime type tika") {
test("Detect MIMEtype Tika RDD") {
val mimeTypeCounts = RecordLoader.loadArchives(arcPath, sc)
.map(r => RemoveHTTPHeaderRDD(r.getContentString))
.groupBy(content => DetectMimeTypeTika(content.getBytes))
Expand Down
10 changes: 5 additions & 5 deletions src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
assert(RecordLoader.loadArchives(warcPath, sc).count == 299L)
}

test("Resource name produces expected result.") {
test("Resource name produces expected result") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => FilenameUtils.getName(x.getArchiveFilename))
.take(3)
Expand Down Expand Up @@ -81,7 +81,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
assert(textSampleWarc.deep == Array("", exampleUrl, exampleUrl).deep)
}

test("Urls") {
test("URLs") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getUrl).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand All @@ -92,7 +92,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/robots.txt", "http://www.archive.org/").deep)
}

test("Mime-Type") {
test("MIMEtype") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getMimeType).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand All @@ -103,7 +103,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"text/html").deep)
}

test("Get Http Status") {
test("Get HTTP status") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getHttpStatus).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/io/archivesunleashed/CountableRDDTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records; Extract Domain RDD ") {
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
.map(r => ExtractDomainRDD(r.getUrl))
Expand Down
38 changes: 19 additions & 19 deletions src/test/scala/io/archivesunleashed/RecordDFTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("keep Valid Pages") {
test("Keep valid pages DF") {
val expected = "http://www.archive.org/"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
Expand All @@ -48,7 +48,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard MimeTypes") {
test("Discard MIMEtypes DF") {
val expected = "filedesc://IAH-20080430204825-00000-blackbook.arc"
val mimeTypes = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -59,7 +59,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Date") {
test("Discard date DF") {
val expected = "20080430"
val date = "20080429"
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -70,7 +70,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Urls") {
test("Discard URLs DF") {
val expected = "http://www.archive.org/index.php"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -81,7 +81,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Domains") {
test("Discard domains DF") {
val expected = "http://www.hideout.com.br/"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -92,7 +92,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard HttpStatus") {
test("Discard HTTP status DF") {
val expected = "200"
val statusCode = Set("000")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -103,7 +103,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Content") {
test("Discard content DF") {
val expected = "dns:www.archive.org"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -115,7 +115,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard UrlPatterns") {
test("Discard URL patterns DF") {
val expected = "dns:www.archive.org"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -127,7 +127,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Languages") {
test("Discard languages DF") {
val expected = "dns:www.archive.org"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -139,7 +139,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep HttpStatus") {
test("Keep HTTP status DF") {
val expected = "http://www.archive.org/robots.txt"
val statusCode = Set("200")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -150,7 +150,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Date") {
test("Keep date DF") {
val expected = "http://www.archive.org/"
val month = List("04")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -161,7 +161,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Urls") {
test("Keep URLs DF") {
val expected = "http://www.archive.org/"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -172,7 +172,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Domains") {
test("Keep domains DF") {
val expected = "http://www.archive.org/robots.txt"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -183,7 +183,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypesTika") {
test("Keep MIMEtypes Tika DF") {
val expected = "image/jpeg"
val mimeType = Set("image/jpeg")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -194,7 +194,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypes") {
test("Keep MIMEtypes DF") {
val expected = "text/html"
val mimeType = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -205,7 +205,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep content") {
test("Keep content DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -217,7 +217,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep UrlPatterns") {
test("Keep URL patterns DF") {
val expected = "http://www.archive.org/images/go-button-gateway.gif"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -229,7 +229,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Languages") {
test("Keep languages DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -241,7 +241,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep keepMimeTypes") {
test("Keep images DF") {
val expected = "image/jpeg"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("loads Warc") {
test("Load WARC") {
val base = RecordLoader.loadArchives(warcPath, sc)
.keepValidPages()
.map(x => x.getUrl)
Expand Down
Loading

0 comments on commit ffef735

Please sign in to comment.