diff --git a/.travis.yml b/.travis.yml
index 1ad3f52..d72bb68 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,7 +27,7 @@ install:
   - if [[ "x$QLIC_KC" != "x" ]]; then
       echo -n $QLIC_KC |base64 --decode > q/kc.lic;
       pip -q install -r requirements.txt;
-      python -m spacy download en;
+      python -m spacy download en; 
     fi
 beforescript:
   - IMPLEMENTATION=$(if [[ "x$TRAVIS_TAG" == "x" ]]; then echo $TRAVIS_BRANCH-$TRAVIS_COMMIT; else echo $TRAVIS_TAG; fi;)
diff --git a/README.md b/README.md
index a9d3821..f0e0e7a 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,9 @@ The NLP allows users to parse dataset using the spacy model from python in which
 The following python packages are required:
   1. numpy
   2. beautifulsoup4
-  3. spacy
+  3. spacy 
+
+* Tests were run using spacy version 2.2.1
 
 To install these packages with
 
@@ -27,11 +29,50 @@ pip install -r requirements.txt
 ```
 or with conda
 ```bash
-conda install --file requirements.txt
+conda install -c conda-forge --file requirements.txt
 ```
 
 * Download the English model using ```python -m spacy download en```
-   
+
+Other languages that spacy supports can be found at https://spacy.io/usage/models#languages
+
+To use the languages in the alpha stage of developement in spacy the following steps can be taken:
+
+To Download the Chinese model the jieba must be installed
+
+pip
+```bash
+pip install jieba
+```
+
+To download the Japanese model mecab must be installed
+
+pip
+```bash
+pip install mecab-python3
+```
+
+* spacy_hunspell is not a requirement to run these scripts, but can be installed using the following methods
+
+Linux
+```bash
+sudo apt-get install libhunspell-dev hunspell
+pip install spacy_hunspell
+```
+
+mac
+```bash
+wget https://iweb.dl.sourceforge.net/project/wordlist/speller/2019.10.06/hunspell-en_US-2019.10.06.zip;
+unzip hunspell-en_US-2019.10.06; sudo mv en_US.dic en_US.aff /Library/Spelling/; 
+brew install hunspell;
+export C_INCLUDE_PATH=/usr/local/include/hunspell;
+sudo ln -sf /usr/local/lib/libhunspell-1.7.a /usr/local/lib/libhunspell.a;
+sudo ln -sf /usr/local/Cellar/hunspell/1.7.0_2/lib/libhunspell-1.7.dylib /usr/local/Cellar/hunspell/1.7.0_2/lib/libhunspell.dylib;
+CFLAGS=$(pkg-config --cflags hunspell) LDFLAGS=$(pkg-config --libs hunspell) pip install hunspell==0.5.0
+```
+
+At the moment spacy_hunspell does not support installation for windows. More information can be found at https://github.com/tokestermw/spacy_hunspell
+
 ## Installation
 Run tests with
 
@@ -39,18 +80,21 @@ Run tests with
 q test.q
 ```
 
-Place the library file in `$QHOME` and load `nlp/init.q`
+Place the library file in `$QHOME` and load into a q instance using 
+
 ```q
-q)\l nlp/init.q
-Loading utils.q
-Loading regex.q
-Loading sent.q
-Loading parser.q
-Loading time.q
-Loading date.q
-Loading email.q
-Loading cluster.q
-Loading nlp.q
+q)\l nlp/nlp.q
+q).nlp.loadfile`:init.q
+Loading init.q
+Loading code/utils.q
+Loading code/regex.q
+Loading code/sent.q
+Loading code/parser.q
+Loading code/time.q
+Loading code/date.q
+Loading code/email.q
+Loading code/cluster.q
+Loading code/nlp_code.q
 q).nlp.findTimes"I went to work at 9:00am and had a coffee at 10:20"
 09:00:00.000 "9:00am" 18 24
 10:20:00.000 "10:20"  45 50
@@ -73,15 +117,15 @@ If you have [Docker installed](https://www.docker.com/community-edition) you can
     KDB+ 3.5 2018.04.25 Copyright (C) 1993-2018 Kx Systems
     l64/ 4()core 7905MB kx 0123456789ab 172.17.0.2 EXPIRE 2018.12.04 bob@example.com KOD #0000000
 
-    Loading utils.q
-    Loading regex.q
-    Loading sent.q
-    Loading parser.q
-    Loading time.q
-    Loading date.q
-    Loading email.q
-    Loading cluster.q
-    Loading nlp.q
+    Loading code/utils.q
+    Loading code/regex.q
+    Loading code/sent.q
+    Loading code/parser.q
+    Loading code/time.q
+    Loading code/date.q
+    Loading code/email.q
+    Loading code/cluster.q
+    Loading code/nlp_code.q
     q).nlp.findTimes"I went to work at 9:00am and had a coffee at 10:20"
     09:00:00.000 "9:00am" 18 24
     10:20:00.000 "10:20"  45 50
@@ -97,9 +141,7 @@ If you have [Docker installed](https://www.docker.com/community-edition) you can
 
 Documentation is available on the [nlp](https://code.kx.com/v2/ml/nlp/) homepage.
 
-
-
-  
+ 
 
 ## Status
   
diff --git a/build/getembedpy.q b/build/getembedpy.q
index bc802b8..fe30fc6 100644
--- a/build/getembedpy.q
+++ b/build/getembedpy.q
@@ -1,5 +1,5 @@
 qhome:hsym`$$[not count u:getenv`QHOME;[-2"QHOME not defined";exit 1];u];
-dl:{[s;url]$[s;;`/:]system"curl -u ",getenv[`GH_APIREAD]," -s -L ",url,$[s;" -J -O";""]}
+dl:{[s;url]$[s;;`/:]system "curl -u ",getenv[`GH_APIREAD]," -s -L ",url,$[s;" -J -O";""]}
 download:{
  assets:.j.k[dl[0b]"https://api.github.com/repos/KxSystems/embedPy/releases/",$[not[count x]|x~"latest";"latest";"tags/",x]]`assets;
  relurl:first exec browser_download_url from assets where name like{"*",x,"*"}(`m64`l64`w64!string`osx`linux`windows).z.o;
diff --git a/cluster.q b/code/cluster.q
similarity index 92%
rename from cluster.q
rename to code/cluster.q
index 2c60e3f..3f981fb 100644
--- a/cluster.q
+++ b/code/cluster.q
@@ -6,7 +6,7 @@ cluster.i.asKeywords:{i.fillEmptyDocs $[-9=type x[0]`keywords;x;x`keywords]}
 // Get cohesiveness of cluster as measured by mean sum of squares error
 cluster.MSE:{[docs]
   $[0=n:count docs;0n;1=n;1.;0=sum count each docs;0n;
-    avg d*d:0^i.compareDocToCentroid[i.takeTop[50]i.fastSum docs]each i.fillEmptyDocs docs]}
+    avg d*d:0^compareDocToCentroid[i.takeTop[50]i.fastSum docs]each i.fillEmptyDocs docs]}
 
 // Bisecting k-means algo (repeatedly splits largest cluster in 2)
 cluster.bisectingKMeans:{[docs;k;n]
@@ -24,7 +24,7 @@ cluster.kmeans:{[docs;k;n]
   }[docs]/(k;0N)#neg[nd]?nd:count docs:cluster.i.asKeywords docs}
 
 // Match each doc to nearest centroid
-cluster.i.groupByCentroids:{[centroids;docs]
+cluster.groupByCentroids:{[centroids;docs]
   value group{[centroids;doc]$[0<m:max s:compareDocs[doc]each centroids;s?m;0n]}[centroids]each docs}
 
 // Merge any clusters with significant overlap into a single cluster
@@ -66,7 +66,7 @@ cluster.MCL:{[docs;mn;sample]
   keywords:docs idx:$[sample;(neg"i"$sqrt count docs)?count docs;til count docs];
   similarities:i.matrixFromRaggedList i.compareDocToCorpus[keywords]each til count keywords;
   // Find all the clusters
-  clustersOfOne:1=count each clusters:cluster.similarityMatrix similarities>=mn;
+  clustersOfOne:1=count each clusters:cluster.i.similarityMatrix similarities>=mn;
   if[not sample;:clusters where not clustersOfOne];
   // Any cluster of 1 documents isn't a cluster, so throw it out
   outliers:raze clusters where clustersOfOne;
@@ -76,11 +76,11 @@ cluster.MCL:{[docs;mn;sample]
   centroids:avg each keywords clusters;
   // Move each non-outlier to the nearest centroid
   nonOutliers:(til count docs)except idx outliers;
-  nonOutliers cluster.i.groupByCentroids[centroids;docs nonOutliers]}
+  nonOutliers cluster.groupByCentroids[centroids;docs nonOutliers]}
 
 // Graph clustering that works on a similarity matrix
 cluster.i.columnNormalize:{[mat]0f^mat%\:sum mat}
-cluster.similarityMatrix:{[mat]
+cluster.i.similarityMatrix:{[mat]
   matrix:"f"$mat;
   // SM Van Dongen's MCL clustering algorithm
   MCL:{[mat]
@@ -105,4 +105,4 @@ cluster.summarize:{[docs;n]
     centroids,:nearest:i.maxIndex docs[;i.maxIndex summary];
     summary-:docs nearest;
     summary:(where summary<0)_ summary];
-  cluster.i.groupByCentroids[docs centroids;docs]}
+  cluster.groupByCentroids[docs centroids;docs]}
diff --git a/date.q b/code/date_time.q
similarity index 57%
rename from date.q
rename to code/date_time.q
index d05555e..9bfa945 100644
--- a/date.q
+++ b/code/date_time.q
@@ -1,38 +1,38 @@
 \d .nlp
 
 // Pad day string to 2 digits
-tm.parseDay:{-2#"0",x where x in .Q.n}
+tm.i.parseDay:{-2#"0",x where x in .Q.n}
 
 // Convert month string and pad to 2 digits
-tm.months:`jan`feb`mar`apr`may`jun`jul`aug`sep`oct`nov`dec!`$string 1+til 12
-tm.parseMonth:{-2#"0",string x^tm.months x:lower`$3 sublist x}
+tm.i.months:`jan`feb`mar`apr`may`jun`jul`aug`sep`oct`nov`dec!`$string 1+til 12
+tm.i.parseMonth:{-2#"0",string x^tm.i.months x:lower`$3 sublist x}
 
 // Pad year string to 4 digits (>35 deemed 1900s)
-tm.parseYear:{-4#$[35<"I"$-2#x;"19";"20"],x}
+tm.i.parseYear:{-4#$[35<"I"$-2#x;"19";"20"],x}
 
 // Convert year string to date range
-tm.convY:{"D"$x,/:(".01.01";".12.31")}
+tm.i.convY:{"D"$x,/:(".01.01";".12.31")}
 
 // Convert yearmonth string to date range
-tm.convYM:{
+tm.i.convYM:{
   matches:ungroup([fmt:"ym"]txt:regex.matchAll[;x]each regex.objects`year`month);
   matches:value select fmt,last txt by s from matches,'flip`txt`s`e!flip matches`txt;
   fmt:{@[x;where not xx;except[;raze x where xx:1=count each x]]}/[matches`fmt];
   fmt:raze@[fmt;i where 1<count each i:group fmt;:;" "];
-  0 -1+"d"$0 1+"M"$"."sv tm[`parseYear`parseMonth]@'matches[`txt]idesc fmt}
+  0 -1+"d"$0 1+"M"$"."sv tm.i[`parseYear`parseMonth]@'matches[`txt]idesc fmt}
 
 // Convert yearmonthday string to date range
-tm.convYMD:{
+tm.i.convYMD:{
   matches:ungroup([fmt:"ymd"]txt:regex.matchAll[;x]each regex.objects`year`month`day);
   matches:value select fmt,last txt by s from matches,'flip`txt`s`e!flip matches`txt;
   fmt:{@[x;i unq;:;"ymd" unq:where 1=count each i:where each "ymd" in/:\:x]}/[matches`fmt];
-  fmt:tm.resolveFormat raze@[fmt;where 1<count each fmt;:;" "];  
-  2#"D"$"."sv tm[`parseYear`parseMonth`parseDay]@'matches[`txt]idesc fmt}
+  fmt:tm.i.resolveFormat raze@[fmt;where 1<count each fmt;:;" "];  
+  2#"D"$"."sv tm.i[`parseYear`parseMonth`parseDay]@'matches[`txt]idesc fmt}
 
 // Fill in blanks in date format string
-tm.resolveFormat:{$[0=n:sum" "=x;;1=n;ssr[;" ";first"ymd"except x];2=n;tm.dateFormats;{"dmy"}]x}
+tm.i.resolveFormat:{$[0=n:sum" "=x;;1=n;ssr[;" ";first"ymd"except x];2=n;tm.i.dateFormats;{"dmy"}]x}
 
-tm.dateFormats:(!). flip( / fmt given single known position
+tm.i.dateFormats:(!). flip( / fmt given single known position
   ("d  ";"dmy"); // 2nd 12 12
   ("m  ";"mdy"); // Jan 12 12
   ("y  ";"ymd"); // 1999 12 12
@@ -43,13 +43,23 @@ tm.dateFormats:(!). flip( / fmt given single known position
   ("  m";"ydm"); // 12 12 Jan This is never conventionally used
   ("  y";"dmy")) // 12 12 1999 //mdy is the american option
 
+// Turns string matching time regex into a q time
+tm.i.parseTime:{
+  tm:"T"$x where vs[" ";x][0]in"1234567890:.";
+  ampm:regex.check[;x]each regex.objects`am`pm;
+  tm+$[ampm[0]&12=`hh$tm;-1;ampm[1]&12>`hh$tm;1;0]*12:00}
+
+
+// Find all times : list of 4-tuples (time; timeText; startIndex; 1+endIndex)
+tm.findTimes:{time:(tm.i.parseTime each tmtxt[;0]),'tmtxt:regex.matchAll[regex.objects.time;x]; time where time[;0]<24:01}
+
 // Find all dates : list of 5-tuples (startDate; endDate; dateText; startIndex; 1+endIndex)
 tm.findDates:{[text]
   rmInv:{x where not null x[;0]};
   ym:regex.matchAll[regex.objects.yearmonth;text];
   ymd:regex.matchAll[regex.objects.yearmonthday;text];
-  dts:rmInv(tm.convYMD each ymd[;0]),'ymd;
+  dts:rmInv(tm.i.convYMD each ymd[;0]),'ymd;
   if[count dts;ym@:where not any ym[;1] within/: dts[; 3 4]];
-  dts,:rmInv(tm.convYM each ym[;0]),'ym;
+  dts,:rmInv(tm.i.convYM each ym[;0]),'ym;
   dts iasc dts[;3]}
 
diff --git a/email.q b/code/email.q
similarity index 76%
rename from email.q
rename to code/email.q
index 9c60cb3..490e4e9 100644
--- a/email.q
+++ b/code/email.q
@@ -1,11 +1,11 @@
 \d .nlp
 
 //Loading python script to extract rtf text
-system"l ",.nlp.path,"/","extract_rtf.p";
-striprtf:.p.get[`striprtf;<]
+system"l ",.nlp.path,"/","code/extract_rtf.p";
+i.striprtf:.p.get[`striprtf;<]
 
 // Read mbox file, convert to table, parse metadata & content
-email.i.getMboxText:{[fp]update text:.nlp.email.i.extractText each payload from email.i.parseMbox fp}
+email.getMboxText:{[fp]update text:.nlp.email.i.extractText each payload from email.i.parseMbox fp}
 
 email.i.findmime:{all(99=type each y`payload;x~/:y`contentType;0b~'y[`payload]@'`attachment)}
 email.i.html2text:{email.i.bs[x;"html.parser"][`:get_text;"\\n"]`} / extract text from html
@@ -16,7 +16,7 @@ email.i.extractText:{
    / use beautiful soup to extract text from html
    count i:where email.i.findmime["text/html"]x ;"\n\n"sv{email.i.html2text x[y][`payload]`content}[x]each i;
    / use python script to extract text from rtf
-   count i:where email.i.findmime["application/rtf"]x ;"\n\n"sv{striprtf x[y][`payload]`content}[x]each i;
+   count i:where email.i.findmime["application/rtf"]x ;"\n\n"sv{i.striprtf x[y][`payload]`content}[x]each i;
    "\n\n"sv .z.s each x`payload]}
 
 
@@ -31,7 +31,7 @@ email.i.getToFrom:{[msg]
 // Init python and q functions for reading mbox files
 email.i.parseMail:{email.i.parseMbox1 email.i.msgFromString[x]`.}
 email.i.parseMbox:{email.i.parseMbox1 each .p.list[<] .p.import[`mailbox;`:mbox]x}
-email.i.parseMbox1:{k!email.get[k:`sender`to`date`subject`contentType`payload]@\:.p.wrap x}
+email.i.parseMbox1:{k!email.get.i[k:`sender`to`date`subject`contentType`payload]@\:.p.wrap x}
 
 email.i.bs:.p.import[`bs4]`:BeautifulSoup
 email.i.getaddr:.p.import[`email.utils;`:getaddresses;<]
@@ -40,13 +40,13 @@ email.i.decodehdr:.p.import[`email.header;`:decode_header]
 email.i.makehdr:.p.import[`email.header;`:make_header]
 email.i.msgFromString:.p.import[`email]`:message_from_string
 
-email.get.sender:{email.i.getaddr e where not(::)~'e:raze x[`:get_all;<]each("from";"resent-from")}
-email.get.to:{email.i.getaddr e where not any(::;"")~/:\:e:raze x[`:get_all;<]each("to";"cc";"resent-to";"resent-cc")}
-email.get.date:{"P"$"D"sv".:"sv'3 cut{$[1=count x;"0";""],x}each string 6#email.i.parsedate x[@;`date]}
-email.get.subject:{$[(::)~(s:x[@;`subject])`;"";email.i.makehdr[email.i.decodehdr s][`:__str__][]`]}
-email.get.contentType:{x[`:get_content_type][]`}
+email.get.i.sender:{email.i.getaddr e where not(::)~'e:raze x[`:get_all;<]each("from";"resent-from")}
+email.get.i.to:{email.i.getaddr e where not any(::;"")~/:\:e:raze x[`:get_all;<]each("to";"cc";"resent-to";"resent-cc")}
+email.get.i.date:{"P"$"D"sv".:"sv'3 cut{$[1=count x;"0";""],x}each string 6#email.i.parsedate x[@;`date]}
+email.get.i.subject:{$[(::)~(s:x[@;`subject])`;"";email.i.makehdr[email.i.decodehdr s][`:__str__][]`]}
+email.get.i.contentType:{x[`:get_content_type][]`}
 / return a dict of `attachment`content or a table of payloads, content is byte[] for binary data, char[] for text
-email.get.payload:{
+email.get.i.payload:{
  if[x[`:is_multipart][]`;:email.i.parseMbox1 each x[`:get_payload][]`];
  raw:x[`:get_payload;`decode pykw 1]; / raw bytes decoded from base64 encoding, wrapped embedPy
  if[all("application/rtf"~(x[`:get_content_type][]`);"attachment"~x[`:get_content_disposition][]`);:`attachment`content!(0b;raw`)];
diff --git a/extract_rtf.p b/code/extract_rtf.p
similarity index 100%
rename from extract_rtf.p
rename to code/extract_rtf.p
diff --git a/code/nlp_code.q b/code/nlp_code.q
new file mode 100644
index 0000000..ca361b5
--- /dev/null
+++ b/code/nlp_code.q
@@ -0,0 +1,163 @@
+\d .nlp
+
+// Date-Time
+
+// Find all dates : list of 5-tuples (startDate; endDate; dateText; startIndex; 1+endIndex)
+findDates:tm.findDates
+
+// Find all times : list of 4-tuples (time; timeText; startIndex; 1+endIndex)
+findTimes:tm.findTimes
+
+// Email
+
+// Read mbox file, convert to table, parse metadata & content
+email.loadEmails:loadEmails:email.getMboxText
+
+// Graph of who emailed whom, inc number of mails
+email.getGraph:{[msgs]
+  0!`volume xdesc select volume:count i by sender,to from flip`sender`to!flip`$raze email.i.getToFrom each msgs}
+
+email.parseMail:email.i.parseMail
+
+// Sentiment
+
+// Calculate sentiment of sentence of short message
+sentiment:sent.score
+
+// Comparing docs/terms
+
+// Give 2 dicts of each term's affinity to each corpus
+// Algorithm from Rayson, Paul, and Roger Garside. "Comparing corpora using frequency profiling."
+// Proceedings of the workshop on Comparing Corpora. Association for Computational Linguistics, 2000
+compareCorpora:{[corp1;corp2]
+  if[(not count corp1)|(not count corp2);:((`$())!();(`$())!())];
+  getTermCount:{[corp]
+    i.fastSum{1+log count each group x}each corp[`tokens]@'where each not corp`isStop};
+  totalWordCountA:sum termCountA:getTermCount corp1;
+  totalWordCountB:sum termCountB:getTermCount corp2;
+  // The expected termCount of each term in each corpus
+  coef:(termCountA+termCountB)%(totalWordCountA+totalWordCountB);
+  expectedA:totalWordCountA*coef;
+  expectedB:totalWordCountB*coef;
+  // Return the differences between the corpora
+  (desc termCountA*log termCountA%expectedA;desc termCountB*log termCountB%expectedB)}
+
+// Calc cosine similarity of two docs
+compareDocs:{cosineSimilarity .(x;y)@\:distinct raze key each(x;y)}
+
+// Compare similarity of 2 vectors
+cosineSimilarity:{sum[x*y]%(sqrt sum x*x)*sqrt sum y*y}
+
+// How much each term contributes to the cosine similarity
+explainSimilarity:{[doc1;doc2]
+  alignedKeys:inter[key doc1;key doc2];
+  doc1@:alignedKeys;
+  doc2@:alignedKeys;
+  product:(doc2%i.magnitude doc1)*(doc2%i.magnitude doc2);
+  desc alignedKeys!product%sum product}
+
+// Cosine similarity of doc and centroid
+compareDocToCentroid:{[centroid;doc]
+  doc@:alignedKeys:distinct key[centroid],key doc;
+  cosineSimilarity[doc;centroid[alignedKeys]-doc]}
+
+// Calc cosine similarity between doc and entire corpus
+compareDocToCorpus:i.compareDocToCorpus
+
+// Jaro-Winkler distance between 2 strings
+jaroWinkler:{i.jaroWinkler[lower x;lower y]}
+
+// Feature Vectors
+
+// Generate feature vector (of stemmed tokens) for a term
+findRelatedTerms:{[docs;term]
+  sent:raze docs[`sentIndices]cut'@'[docs[`tokens];where each docs`isStop;:;`];
+  sent@:asc distinct raze 0|-1 0 1+\:where(term:lower term)in/:sent;
+  ccur:` _ count each group raze distinct each sent;
+  tcur:idx@'group each docs[`tokens]@'idx:where each docs[`tokens]in\:key ccur;
+  tcur:i.fastSum((count distinct@)each)each docs[`sentIndices]bin'tcur;
+  ccur%:tcur term;
+  tcur%:sum count each docs`sentIndices;
+  desc except[where r>0;term]#r:(ccur-tcur)%sqrt tcur*1-tcur}
+
+// Find runs containing term where each word has above average co-ocurrance with term
+extractPhrases:{[corpus;term]
+  relevant:term,sublist[150]where 0<findRelatedTerms[corpus]term:lower term;
+  runs:(i.findRuns where@)each(tokens:corpus`tokens)in\:relevant;
+  desc(where r>1)#r:count each group r where term in/:r:raze tokens@'runs}
+
+// On a conceptually single doc (e.g. novel), gives better results than TF-IDF
+// This algorithm is explained in the paper
+// Carpena, P., et al. "Level statistics of words: Finding keywords in literary texts and symbolic sequences."
+// Physical Review E 79.3 (2009): 035102.
+keywordsContinuous:{[docs]
+  n:count each gt:group text:raze docs[`tokens]@'where each not docs`isStop;
+  words:where n>=4|.00002*count text;
+  dist:deltas each words#gt;
+  sigma:(dev each dist)%(avg each dist)*sqrt 1-(n:words#n)%count text;
+  std_sigma:1%sqrt[n]*1+2.8*n xexp -0.865;
+  chev_sigma:((2*n)-1)%2*n+1;
+  desc(sigma-chev_sigma)%std_sigma}
+
+// Find TFIDF scores for all terms in all documents
+TFIDF:{[corpus]
+  tokens:corpus[`tokens]@'where each not corpus[`isStop]|corpus[`tokens]like\:"[0-9]*";
+  tab:{x!{sum[x in y]%count x}[y]each x}'[words:distinct each tokens;tokens];
+  tab*idf:1+log count[tokens]%{sum{x in y}[y]each x}[tokens]each words}
+
+TFIDF_tot:{[corpus]desc sum t%'sum each t:TFIDF corpus}
+
+// Parse Data
+
+// Create a new parser using a spaCy model (must already be installed)
+newParser:parser.newParser
+
+// Parse urls to dictionaries
+parseURLs:{`scheme`domainName`path`parameters`query`fragment!i.parseURLs x}
+
+// Exploratory Analysis 
+
+// Find runs of tokens whose POS tags are in the set passed in
+// Returns pair (text; firstIndex)
+findPOSRuns:{[tagType;tags;doc]
+  start:where 1=deltas matchingTag:doc[tagType]in tags;
+  ii:start+til each lengths:sum each start cut matchingTag;
+  runs:`$" "sv/:string each doc[`tokens]start+til each lengths;
+  flip(runs;ii)}
+
+// Currently only for 2-gram
+bi_gram:{[corpus]
+ tokens:raze corpus[`tokens]@'where each not corpus[`isStop]|corpus[`tokens]like\:"[0-9]*";
+ occ:(distinct tokens)!{count where y=x}[tokens]each distinct tokens;
+ raze{[x;y;z;n](enlist(z;n))!enlist(count where n=x 1+where z=x)%y[z]}[tokens;occ]''[tokens;next tokens]}
+
+// Util 
+
+// Find Regular expressions within texts
+findRegex:{[text;expr]($[n;enlist;]expr)!$[n:1=count[expr];enlist;]{regex.matchAll[regex.objects[x];y]}[;text]each expr}
+
+// Remove any ascii characters from a text
+ascii:{x where x within (0;127)}
+
+// Remove certain characters from a string of text
+rmv_custom:{rtrim raze(l where{not(max ,'/)x like/:y}[;y]each l:" "vs x),'" "}
+
+// Remove and replace certain characters from a string of text
+rmv_master:{{x:ssr[x;y;z];x}[;;z]/[x;y]}
+
+// Detect language from text
+detectLang:{[text]`$.p.import[`langdetect][`:detect;<][text]}
+
+// Import all files in a dir recursively
+loadTextFromDir:{[fp]
+  path:{[fp]raze$[-11=type k:key fp:hsym fp;fp;.z.s each` sv'fp,'k]}`$fp;
+  ([]fileName:(` vs'path)[;1];path;text:"\n"sv'read0 each path)}
+
+// Get all sentences for a doc
+getSentences:i.getSentences
+
+// n-gram 
+ngram:{[corpus;n]
+ tokens:raze corpus[`tokens]@'where each not corpus[`isStop]|corpus[`tokens]like\:"[0-9]*";
+ raze[key[b],/:'{key x}each value b]!raze value each value b:{(count each group x)%count x
+  }each last[tab]group neg[n-1]_flip(n-1)#tab:rotate\:[til n]tokens}
diff --git a/parser.q b/code/parser.q
similarity index 81%
rename from parser.q
rename to code/parser.q
index a40b8da..2e4e705 100644
--- a/parser.q
+++ b/code/parser.q
@@ -2,10 +2,24 @@
 
 .p.import[`sys;:;`:argv;()]; / spacy expects python be the main process
 
+p)def spell(doc,model):
+  lst=[]
+  for s in doc:
+    if s._.hunspell_spell==False:
+      sug=s._.hunspell_suggest
+      if len(sug)>0:
+        ([lst.append(n)for n in model((sug)[0])]) 
+      else:lst.append(s)
+    else:
+        lst.append(s)
+  return lst
+
 // Python functions for running spacy
 p)def get_doc_info(parser,tokenAttrs,opts,text):
-  doc=parser(text)
-  res=[[getattr(w,a)for w in doc]for a in tokenAttrs]
+  doc=doc1=parser(text)
+  if('spell' in opts):
+    doc1=spell(doc,parser)
+  res=[[getattr(w,a)for w in doc1]for a in tokenAttrs]
   if('sentChars' in opts): # indices of first+last char per sentence
     res.append([(s.start_char,s.end_char)for s in doc.sents])
   if('sentIndices' in opts): # index of first token per sentence
@@ -49,8 +63,8 @@ parser.i.alphalang:(!). flip(
   (`zh;`Chinese))
 
 // Create new parser
-// Valid opts : text keywords likeEmail likeNumber likeURL isStop tokens lemmas uniPOS pennPOS starts sentChars sentIndices
-parser.i.newParser:{[lang;opts]
+// Valid opts : text keywords likeEmail likeNumber likeURL isStop tokens lemmas uniPOS pennPOS starts sentChars sentIndices spell
+parser.newParser:{[lang;opts]
   opts:{distinct x,raze parser.i.depOpts x}/[colnames:opts];
   disabled:`ner`tagger`parser except opts;
   model:parser.i.newSubParser[lang;opts;disabled];
@@ -65,6 +79,9 @@ parser.i.newSubParser:{[lang;opts;disabled]
  model:.p.import[$[`~chklng;`spacy;sv[`]`spacy.lang,lang]][hsym$[`~chklng;`load;chklng]
    ]. raze[$[`~chklng;lang;()];`disable pykw disabled];
   if[`sbd in opts;model[`:add_pipe]$[`~chklng;model[`:create_pipe;`sentencizer];.p.pyget `x_sbd]];
+  if[`spell in opts;if[not .p.import[`spacy.tokens][`:Token][`:has_extension]["hunspell_spell"]`;
+   sphun:.p.import[`spacy_hunspell]`:spaCyHunSpell;hunspell:sphun[model;
+   $[`Darwin~syst:`$.p.import[`platform][`:system][]`;`mac;lower syst]];model[`:add_pipe]hunspell]];
  model}
 
 // Operations that must be done in q, or give better performance in q
@@ -72,7 +89,7 @@ parser.i.runParser:{[pyParser;colnames;opts;stopwords;docs]
   t:parser.i.cleanUTF8 each docs;
   parsed:parser.i.unpack[pyParser;opts;stopwords]each t;
   if[`keywords in opts;parsed[`keywords]:TFIDF parsed];
-  colnames#@[parsed;`text;:;t]}
+  (($[1=count colnames;enlist;]colnames) except `spell)#@[parsed;`text;:;t]}
 
 // Operations that must be done in q, or give better performance in q
 parser.i.unpack:{[pyParser;opts;stopwords;text]
@@ -94,7 +111,7 @@ parser.i.unpack:{[pyParser;opts;stopwords;text]
 
 // Python indexes into strings by char instead of byte, so must be modified to index a q string
 parser.i.adjustIndices:{[text;doc]
-  adj:cont-til count cont:where text within"\200\277";
+  adj:cont-til count cont:where ($[1~count text;enlist;]text) within"\200\277";
   if[`starts    in cols doc;doc[`starts   ]+:adj binr 1+doc`starts   ];
   if[`sentChars in cols doc;doc[`sentChars]+:adj binr 1+doc`sentChars];
   doc}
diff --git a/regex.q b/code/regex.q
similarity index 94%
rename from regex.q
rename to code/regex.q
index c70c397..020b7b7 100644
--- a/regex.q
+++ b/code/regex.q
@@ -28,7 +28,5 @@ regex.patterns.yearmonthdayList:"(",sv["|";regex.patterns`year`month`day],")"
 regex.patterns.yearmonth:       "(",sv[regex.patterns.dtsep;2#enlist regex.patterns.yearmonthList   ],")"
 regex.patterns.yearmonthday:    "(",sv[regex.patterns.dtsep;3#enlist regex.patterns.yearmonthdayList],")"
 
-/regex.patterns.mnsep:"[\\t \\\\]+"
-/regex.patterns.yearmonth: "(",sv[regex.patterns.mnsep;2#enlist regex.patterns.yearmonthList   ],")"
-
 regex.objects:regex.compile[;1b]each 1_regex.patterns
+
diff --git a/sent.q b/code/sent.q
similarity index 73%
rename from sent.q
rename to code/sent.q
index 1d52f7f..8e977a1 100644
--- a/sent.q
+++ b/code/sent.q
@@ -1,7 +1,7 @@
 \d .nlp
 
 // Create regex used for tokenizing
-sent.tokenPattern:{
+sent.i.tokenPattern:{
   rightFacingEmoticons:"[<>]?[:;=8][\\-o\\*\\']?[\\)\\]\\(\\[dDpP/\\:\\}\\{@\\|\\\\]"; / n.b. Left-facing rarely used
   miscEmoticons:"<3|[0o][._][0o]|</3|\\\\o/|[lr]&r|j/[jkptw]|\\*\\\\0/\\*|v\\.v|o/\\\\o";
   urlStart:"https?://";
@@ -11,49 +11,49 @@ sent.tokenPattern:{
  }[]
 
 // Tokenizer specifically for sentiment analyzer (won't work for general purpose tokenizing)
-sent.tokenize:{`$regex.matchAll[sent.tokenPattern;x][;0]}
+sent.i.tokenize:{`$regex.matchAll[sent.i.tokenPattern;x][;0]}
 
 // Start indices of occurences of seq in list (faster than looping over list for each element)
-sent.findSequence:{[list;seq]neg[count seq]+{[list;i;x]1+i where x=list i}[list]/[til count list;seq]}
+sent.i.findSequence:{[list;seq]neg[count seq]+{[list;i;x]1+i where x=list i}[list]/[til count list;seq]}
 
 // Inc mean sentiment intensity rating from '!' (up to 4)
 // Empirically derived mean sentiment intensity rating increase for exclamation points
-sent.amplifyEP:{.292*4&sum"!"=x}
+sent.i.amplifyEP:{.292*4&sum"!"=x}
 
 // Inc mean sentiment intensity rating from '?' (up to 4)
 // Empirically derived mean sentiment intensity rating increases for question marks
-sent.amplifyQM:{(0 0 .36 .54 .96)4&sum"?"=x}
+sent.i.amplifyQM:{(0 0 .36 .54 .96)4&sum"?"=x}
 
 // Increase valences (weights) for booster words e.g. "really", "very"
-sent.posBoosters:`$(
+sent.i.posBoosters:`$(
   "absolutely"; "amazingly"; "awfully"; "completely"; "considerably"; "decidedly"; "deeply";
   "effing"; "enormously"; "entirely"; "especially"; "exceptionally"; "extremely"; "fabulously";
   "flipping"; "flippin"; "fricking"; "frickin"; "frigging"; "friggin"; "fully"; "fucking";
   "greatly"; "hella"; "highly"; "hugely"; "incredibly"; "intensely"; "majorly"; "more"; "most";
   "particularly"; "purely"; "quite"; "really"; "remarkably"; "so"; "substantially"; "thoroughly";
   "totally"; "tremendously"; "uber"; "unbelievably"; "unusually"; "utterly"; "very");
-sent.negBoosters:`$(
+sent.i.negBoosters:`$(
   "almost"; "barely"; "hardly"; "just enough"; "kind of"; "kinda"; "kindof"; "kind-of"; "less";
   "little"; "marginally"; "occasionally"; "partly"; "scarcely"; "slightly"; "somewhat"; "sort of";
   "sorta"; "sortof"; "sort-of");
-sent.BOOSTER_INCR: .293
-sent.ALLCAPS_INCR: .733
-sent.Boosters:(!). flip(sent.posBoosters,\:sent.BOOSTER_INCR),(sent.negBoosters,\:neg sent.BOOSTER_INCR)
+sent.i.BOOSTER_INCR: .293
+sent.i.ALLCAPS_INCR: .733
+sent.i.Boosters:(!). flip(sent.i.posBoosters,\:sent.i.BOOSTER_INCR),(sent.i.negBoosters,\:neg sent.i.BOOSTER_INCR)
 
-sent.applyBoosters:{[tokens;isUpperCase;valences]
-  weight:sent.Boosters tokens;
+sent.i.applyBoosters:{[tokens;isUpperCase;valences]
+  weight:sent.i.Boosters tokens;
   // Inc degree of capitalized boosters
-  weight[wup]+:sent.ALLCAPS_INCR*signum weight wup:where isUpperCase;
+  weight[wup]+:sent.i.ALLCAPS_INCR*signum weight wup:where isUpperCase;
   // Add weight to next 3 tokens (add/remove 3 dummy vals in case booster is last token)
   boosts:-3_@[(3+count valences)#0f;i+/:1 2 3;+;weight[i:where not null weight]*/:1 .95 .9];
   // Add extra weight
   valences+boosts*signum valences}
 
 // Decrease weight of valences before "but", and increase the weight of valences after it
-sent.butCheck:{[tokens;valences]$[j:count[tokens]-i:tokens?`but;@[;til i;*;.5]@[;i+1+til j-1;*;1.5]@;]"f"$valences}
+sent.i.butCheck:{[tokens;valences]$[j:count[tokens]-i:tokens?`but;@[;til i;*;.5]@[;i+1+til j-1;*;1.5]@;]"f"$valences}
 
 // Check for idioms with associated sentiment
-sent.IDIOMS:flip(
+sent.i.IDIOMS:flip(
   (`the`shit; 3f);
   (`the`bomb; 3f);
   (`bad`ass; 1.5f);
@@ -61,12 +61,12 @@ sent.IDIOMS:flip(
   (`cut`the`mustard; 2f); 
   (`kiss`of`death; -1.5f); 
   (`hand`to`mouth; -2f));
-sent.idiomsCheck:{[tokens;valences]
-  indices:raze each 0 1 2 3+/:/:sent.findSequence[lower tokens]each sent.IDIOMS 0;
-  -3_@[;;:;]/[valences,3#0f;indices;sent.IDIOMS 1]}
+sent.i.idiomsCheck:{[tokens;valences]
+  indices:raze each 0 1 2 3+/:/:sent.i.findSequence[lower tokens]each sent.IDIOMS 0;
+  -3_@[;;:;]/[valences,3#0f;indices;sent.i.IDIOMS 1]}
 
 // Check if preceding words increase, decrease, or negate the valence
-sent.NEGATE:`$(
+sent.i.NEGATE:`$(
   "aint"; "arent"; "cannot"; "cant"; "couldnt"; "darent"; "didnt"; "doesnt";
   "ain't"; "aren't"; "can't"; "couldn't"; "daren't"; "didn't"; "doesn't";
   "dont"; "hadnt"; "hasnt"; "havent"; "isnt"; "mightnt"; "mustnt"; "neither";
@@ -75,25 +75,25 @@ sent.NEGATE:`$(
   "oughtnt"; "shant"; "shouldnt"; "uhuh"; "wasnt"; "werent";
   "oughtn't"; "shan't"; "shouldn't"; "uh-uh"; "wasn't"; "weren't";
   "without"; "wont"; "wouldnt"; "won't"; "wouldn't"; "rarely"; "seldom"; "despite")
-sent.N_SCALAR:-0.74 / Co-efficient for sentiments following negation
-sent.negationCheck:{[tokens;valences]
+sent.i.N_SCALAR:-0.74 / Co-efficient for sentiments following negation
+sent.i.negationCheck:{[tokens;valences]
   valences,:3#0f;
   // "never so/as/this" act like boosters
   posNever:where(tokens=`never)&(next next s)|next s:tokens in`so`as`this;
   valences:@[valences;posNever+/:2 3;*;1.5 1.25];
   // tokens in NEGATE or ending in "n't"
-  i:where(tokens in sent.NEGATE)|tokens like"*n't";
-  valences:@[valences;1 2 3+\:i except posNever;*;sent.N_SCALAR];
+  i:where(tokens in sent.i.NEGATE)|tokens like"*n't";
+  valences:@[valences;1 2 3+\:i except posNever;*;sent.i.N_SCALAR];
   // occurences of "least" that are not part of "at/very least"
   j:where(tokens=`least)&not prev tokens in`at`very;
-  valences:@[valences;j+1;*;sent.N_SCALAR];
+  valences:@[valences;j+1;*;sent.i.N_SCALAR];
   -3_ valences}
 
 // Load the dictionary of terms and their sentiment
 // Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media
 // Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
-sent.lexicon :(!).("SF";"\t")0: hsym `$.nlp.path,"/vader/lexicon.txt";
-sent.lexicon,:(!). flip(
+sent.i.lexicon :(!).("SF";"\t")0: hsym `$.nlp.path,"/vader/lexicon.txt";
+sent.i.lexicon,:(!). flip(
   (`$"the shit"; 3f);
   (`$"the bomb"; 3f);
   (`$"bad ass"; 1.5f);
@@ -102,23 +102,12 @@ sent.lexicon,:(!). flip(
   (`$"kiss of death"; -1.5f);
   (`$"hand to mouth"; -2f));
 
-// Calculate sentiment of a sentence of short message
-sent.score:{[text]
-  valences:sent.lexicon tokens:lower rawTokens:sent.tokenize text;
-  isUpperCase:(rawTokens=upper rawTokens)& rawTokens<>tokens;
-  upperIndices:where isUpperCase & not all isUpperCase;
-  valences[upperIndices]+:sent.ALLCAPS_INCR*signum valences upperIndices;
-  valences:sent.applyBoosters[tokens;isUpperCase;valences];
-  valences:sent.negationCheck[tokens;valences];
-  valences:sent.butCheck[tokens;valences];
-  sent.scoreValence[0f^valences;text]}
-
 // Calculate sentiment given individual valences
-sent.scoreValence:{[valences;text]
+sent.i.scoreValence:{[valences;text]
   if[not count valences;:`compound`pos`neg`neu!0 0 0 0f];
   compound:sum valences;
   // Punctuation can increase the intensity of the sentiment
-  compound+:signum[compound]*punctAmplifier:sent.amplifyEP[text]+sent.amplifyQM text;
+  compound+:signum[compound]*punctAmplifier:sent.i.amplifyEP[text]+sent.i.amplifyQM text;
   // Normalize score
   compound:{x%sqrt 15+x*x}compound;
   // Discriminate between positive, negative and neutral sentiment scores
@@ -131,3 +120,15 @@ sent.scoreValence:{[valences;text]
   // Used to noramlize the pos, neg and neutral sentiment
   total:positive+neutral+abs negative;
   `compound`pos`neg`neu!(compound,abs(positive;negative;neutral)%total)}
+
+// Calculate sentiment of a sentence of short message
+sent.score:{[text]
+  valences:sent.i.lexicon tokens:lower rawTokens:sent.i.tokenize text;
+  isUpperCase:(rawTokens=upper rawTokens)& rawTokens<>tokens;
+  upperIndices:where isUpperCase & not all isUpperCase;
+  valences[upperIndices]+:sent.i.ALLCAPS_INCR*signum valences upperIndices;
+  valences:sent.i.applyBoosters[tokens;isUpperCase;valences];
+  valences:sent.i.negationCheck[tokens;valences];
+  valences:sent.i.butCheck[tokens;valences];
+  sent.i.scoreValence[0f^valences;text]}
+
diff --git a/utils.q b/code/utils.q
similarity index 92%
rename from utils.q
rename to code/utils.q
index c522369..8f5f860 100644
--- a/utils.q
+++ b/code/utils.q
@@ -47,14 +47,6 @@ i.jaro:{[s1;s2]
 // Jaro-Winkler distance of 2 strings
 i.jaroWinkler:{$[0.7<w:i.jaro[x;y];w+(sum mins(4#x)~'4#y)*.1*1-w;w]}
 
-// Cosine similarity of doc and centroid
-i.compareDocToCentroid:{[centroid;doc]
-  doc@:alignedKeys:distinct key[centroid],key doc;
-  cosineSimilarity[doc;centroid[alignedKeys]-doc]}
-
-// Calc cosine similarity between doc and entire corpus
-i.compareDocToCorpus:{[keywords;idx]compareDocs[keywords idx]each(idx+1)_ keywords}
-
 // Generating symmetric matrix from triangle (ragged list)
 i.matrixFromRaggedList:{m+flip m:((til count x)#'0.),'.5,'x}
 
@@ -67,3 +59,8 @@ p)from urllib.parse import urlparse
 p)import re
 p)seReg=re.compile('([a-z0-9]+:)?//')
 i.parseURLs:.p.eval["lambda url: urlparse(url if seReg.match(url) else 'http://' + url)";<]
+
+// Calc cosine similarity between doc and entire corpus
+i.compareDocToCorpus:{[keywords;idx]compareDocs[keywords idx]each(idx+1)_ keywords}
+
+
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 947cd79..1c5459a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -35,7 +35,7 @@ USER kx
 
 RUN . /opt/conda/etc/profile.d/conda.sh \
 	&& conda activate kx \
-	&& conda install --file /opt/kx/nlp/requirements.txt \
+	&& conda install -c conda-forge --file /opt/kx/nlp/requirements.txt \
 	&& conda clean -y --all \
 	&& python -m spacy download en
 
diff --git a/init.q b/init.q
index 38e8c1c..33eb05c 100644
--- a/init.q
+++ b/init.q
@@ -1,13 +1,14 @@
-.nlp.loadfile:{$[.z.q;;-1]"Loading ",x;system"l ",.nlp.path,"/",x;} /nlp/",x;}
-/ attempt to find the path of this file, default to nlp if any problem
-.nlp.path:{$[count u:@[{1_string first` vs hsym`$ssr[;"\\";"/"]u -3+count u:get .nlp.loadfile};`;""];u;"nlp"]}[]
-.nlp.hpath:hsym`$.nlp.path
-.nlp.loadfile"utils.q"
-.nlp.loadfile"regex.q"
-.nlp.loadfile"sent.q"
-.nlp.loadfile"parser.q"
-.nlp.loadfile"time.q"
-.nlp.loadfile"date.q"
-.nlp.loadfile"email.q"
-.nlp.loadfile"cluster.q"
-.nlp.loadfile"nlp.q"
+path:{string`nlp^`$@[{"/"sv -1_"/"vs ssr[;"\\";"/"](-3#get .z.s)0};`;""]}`
+system"l ",path,"/","nlp.q"
+
+\d .nlp
+
+loadfile`:code/utils.q
+loadfile`:code/regex.q
+loadfile`:code/sent.q
+loadfile`:code/parser.q
+loadfile`:code/date_time.q
+loadfile`:code/email.q
+loadfile`:code/cluster.q
+loadfile`:code/nlp_code.q
+
diff --git a/nlp.q b/nlp.q
index 746b2aa..aba5942 100644
--- a/nlp.q
+++ b/nlp.q
@@ -1,112 +1,4 @@
 \d .nlp
 version:@[{NLPVERSION};0;`development]
-// Find TFIDF scores for all terms in all documents
-TFIDF:{[corpus]
-  tokens:corpus[`tokens]@'where each not corpus[`isStop]|corpus[`tokens]like\:"[0-9]*";
-  tab:{x!{sum[x in y]%count x}[y]each x}'[words:distinct each tokens;tokens];
-  tab*idf:1+log count[tokens]%{sum{x in y}[y]each x}[tokens]each words}
-
-TFIDF_tot:{[corpus]desc sum t%'sum each t:TFIDF corpus}
-
-// On a conceptually single doc (e.g. novel), gives better results than TF-IDF
-// This algorithm is explained in the paper
-// Carpena, P., et al. "Level statistics of words: Finding keywords in literary texts and symbolic sequences."
-// Physical Review E 79.3 (2009): 035102.
-keywordsContinuous:{[docs]
-  n:count each gt:group text:raze docs[`tokens]@'where each not docs`isStop;
-  words:where n>=4|.00002*count text;
-  dist:deltas each words#gt;
-  sigma:(dev each dist)%(avg each dist)*sqrt 1-(n:words#n)%count text;
-  std_sigma:1%sqrt[n]*1+2.8*n xexp -0.865;
-  chev_sigma:((2*n)-1)%2*n+1;
-  desc(sigma-chev_sigma)%std_sigma}
-
-// Give 2 dicts of each term's affinity to each corpus
-// Algorithm from Rayson, Paul, and Roger Garside. "Comparing corpora using frequency profiling."
-// Proceedings of the workshop on Comparing Corpora. Association for Computational Linguistics, 2000 
-compareCorpora:{[corp1;corp2]
-  if[(not count corp1)|(not count corp2);:((`$())!();(`$())!())];
-  getTermCount:{[corp]
-    i.fastSum{1+log count each group x}each corp[`tokens]@'where each not corp`isStop};
-  totalWordCountA:sum termCountA:getTermCount corp1;
-  totalWordCountB:sum termCountB:getTermCount corp2;
-  // The expected termCount of each term in each corpus
-  coef:(termCountA+termCountB)%(totalWordCountA+totalWordCountB);
-  expectedA:totalWordCountA*coef;
-  expectedB:totalWordCountB*coef;
-  // Return the differences between the corpora
-  (desc termCountA*log termCountA%expectedA;desc termCountB*log termCountB%expectedB)}
-
-// Calc cosine similarity of two docs
-compareDocs:{cosineSimilarity .(x;y)@\:distinct raze key each(x;y)}
-
-// Compare similarity of 2 vectors
-cosineSimilarity:{sum[x*y]%(sqrt sum x*x)*sqrt sum y*y}
-
-// How much each term contributes to the cosine similarity
-explainSimilarity:{[doc1;doc2]
-  alignedKeys:inter[key doc1;key doc2];
-  doc1@:alignedKeys;
-  doc2@:alignedKeys;
-  product:(doc1%i.magnitude doc1)*(doc2%i.magnitude doc2);
-  desc alignedKeys!product%sum product}
-
-// Find runs containing term where each word has above average co-ocurrance with term
-extractPhrases:{[corpus;term]
-  relevant:term,sublist[150]where 0<findRelatedTerms[corpus]term:lower term;
-  runs:(i.findRuns where@)each(tokens:corpus`tokens)in\:relevant;
-  desc(where r>1)#r:count each group r where term in/:r:raze tokens@'runs}
-
-// Find all dates : list of 5-tuples (startDate; endDate; dateText; startIndex; 1+endIndex)
-findDates:tm.findDates
-
-// Find all times : list of 4-tuples (time; timeText; startIndex; 1+endIndex)
-findTimes:tm.findTimes
-
-// Get all sentences for a doc
-getSentences:i.getSentences
-
-// Find runs of tokens whose POS tags are in the set passed in
-// Returns pair (text; firstIndex)
-findPOSRuns:{[tagType;tags;doc]
-  start:where 1=deltas matchingTag:doc[tagType]in tags;
-  ii:start+til each lengths:sum each start cut matchingTag;
-  runs:`$" "sv/:string each doc[`tokens]start+til each lengths;
-  flip(runs;ii)}
-
-// Generate feature vector (of stemmed tokens) for a term
-findRelatedTerms:{[docs;term]
-  sent:raze docs[`sentIndices]cut'@'[docs[`tokens];where each docs`isStop;:;`];
-  sent@:asc distinct raze 0|-1 0 1+\:where(term:lower term)in/:sent;
-  ccur:` _ count each group raze distinct each sent;
-  tcur:idx@'group each docs[`tokens]@'idx:where each docs[`tokens]in\:key ccur;
-  tcur:i.fastSum((count distinct@)each)each docs[`sentIndices]bin'tcur;
-  ccur%:tcur term;
-  tcur%:sum count each docs`sentIndices;
-  desc except[where r>0;term]#r:(ccur-tcur)%sqrt tcur*1-tcur}
-
-// Jaro-Winkler distance between 2 strings
-jaroWinkler:{i.jaroWinkler[lower x;lower y]}
-
-// Import all files in a dir recursively
-loadTextFromDir:{[fp]
-  path:{[fp]raze$[-11=type k:key fp:hsym fp;fp;.z.s each` sv'fp,'k]}`$fp;
-  ([]fileName:(` vs'path)[;1];path;text:"\n"sv'read0 each path)}
-
-// Read an mbox file, converting it to a table with the parsed metadata
-loadEmails:email.i.getMboxText
-
-// Create a new parser using a spaCy model (must already be installed)
-newParser:parser.i.newParser
-
-// Detect language from text
-detectLang:{[text]`$.p.import[`langid][`:classify;<][raze text]0}
-
-// Parse urls to dictionaries
-parseURLs:{`scheme`domainName`path`parameters`query`fragment!i.parseURLs x}
-
-// Calculate sentiment of sentence of short message
-sentiment:sent.score
-
-// Phonological representation of string (commented out for now)
-/doubleMetaphone:.p.import[`metaphone;`:doublemetaphone;<]
+path:{string`nlp^`$@[{"/"sv -1_"/"vs ssr[;"\\";"/"](-3#get .z.s)0};`;""]}`
+loadfile:{$[.z.q;;-1]"Loading ",x:_[":"=x 0]x:$[10=type x;;string]x;system"l ",path,"/",x;} 
diff --git a/requirements.txt b/requirements.txt
index c3843f2..c08bc92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy
 beautifulsoup4
-spacy
+spacy==2.2.1
+langdetect
diff --git a/tests/clustertest.t b/tests/clustertest.t
index 3503831..6bec5fb 100644
--- a/tests/clustertest.t
+++ b/tests/clustertest.t
@@ -1,7 +1,8 @@
+\l nlp.q
 \l init.q
 \d .nlp
-text: first (enlist "*";",";1) 0: `:./data/miniJeff.txt
-p:newParser[`en; enlist`keywords]
+text: first (enlist "*";",";1) 0: `:tests/data/miniJeff.txt
+p:newParser[`en;`keywords]
 corpus:p text
 emptyDoc:([] keywords:enlist ()!())
 truncate:{[precision; x]coefficient: 10 xexp precision;reciprocal[coefficient]*`long$coefficient*x}
@@ -78,17 +79,17 @@ orthogonalDocs:(`a`b!1 1f;`c`d!1 1f;`e`f!1 1f)
 .2 ~ truncate[2] cluster.MSE corpus[1 3 5 7 9; `keywords]
 corpus:p ("beep beep beep";"In Brittany, the Bretons play the bombard";"The Bretons of Brittany enjoy bombard music";"A special hand tool is needed to adjust a bike chain to the right length";"A chain whip is a tool used by a bike mechanic";"Chain oil is recommeneded instead of WD-40"; "A bike mechanic frequently gets chain oil on their hands"; "I enjoy medieval music";"The lute is a common medieval instrument";"Wire strings are common on medieval harps";"Lutes have too many strings";"Medieval wind instruments are also abundant";"No medieval wind instruments had strings";"The modern harp has mostly nylon strings";"Modern music is much less shrill");
 centroids:sum each corpus[`keywords] (enlist 0;1 + til 2;3 + til 4;7 + til 8)
-cluster.i.groupByCentroids[centroids; 1 _ corpus `keywords] ~ -1 + (1 2;3 4 5 6;7 8 9 10 11 12 13 14)
-cluster.i.groupByCentroids[centroids enlist 0; 1 _ corpus `keywords]~ enlist til 14
-cluster.i.groupByCentroids[centroids 1 2; corpus `keywords]~ (0 8 9 10 11 12 13;1 2 7 14;3 4 5 6)
-cluster.i.groupByCentroids[centroids; corpus `keywords]~(enlist 0;1 2;3 4 5 6;7 8 9 10 11 12 13 14)
-cluster.i.groupByCentroids[centroids 0 1 2;()] ~ ()
-cluster.i.groupByCentroids[centroids enlist 2; corpus `keywords] ~ (0 1 2 7 8 9 10 11 12 13 14; 3 4 5 6)
-cluster.i.groupByCentroids[centroids; corpus[enlist 0] `keywords]~ enlist enlist 0           
-(til 15) ~ asc raze cluster.i.groupByCentroids[1_centroids;corpus`keywords]
+cluster.groupByCentroids[centroids; 1 _ corpus `keywords] ~ -1 + (1 2;3 4 5 6;7 8 9 10 11 12 13 14)
+cluster.groupByCentroids[centroids enlist 0; 1 _ corpus `keywords]~ enlist til 14
+cluster.groupByCentroids[centroids 1 2; corpus `keywords]~ (0 8 9 10 11 12 13;1 2 7 14;3 4 5 6)
+cluster.groupByCentroids[centroids; corpus `keywords]~(enlist 0;1 2;3 4 5 6;7 8 9 10 11 12 13 14)
+cluster.groupByCentroids[centroids 0 1 2;()] ~ ()
+cluster.groupByCentroids[centroids enlist 2; corpus `keywords] ~ (0 1 2 7 8 9 10 11 12 13 14; 3 4 5 6)
+cluster.groupByCentroids[centroids; corpus[enlist 0] `keywords]~ enlist enlist 0           
+(til 15) ~ asc raze cluster.groupByCentroids[1_centroids;corpus`keywords]
 \d .
-text: first (enlist "*";",";1) 0: `:./data/miniJeff.txt
-p:.nlp.newParser[`en; enlist`keywords]
+text: first (enlist "*";",";1) 0: `:tests/data/miniJeff.txt
+p:.nlp.newParser[`en;`keywords]
 corpus:p text
 emptyDoc:([] keywords:enlist ()!())
 cluster:.nlp.cluster.summarize[corpus;10]
diff --git a/data/message.mbox b/tests/data/message.mbox
similarity index 100%
rename from data/message.mbox
rename to tests/data/message.mbox
diff --git a/data/miniJeff.txt b/tests/data/miniJeff.txt
similarity index 100%
rename from data/miniJeff.txt
rename to tests/data/miniJeff.txt
diff --git a/data/test.mbox b/tests/data/test.mbox
similarity index 100%
rename from data/test.mbox
rename to tests/data/test.mbox
diff --git a/tests/emailtest.t b/tests/emailtest.t
index f2e1ea3..31fa54f 100644
--- a/tests/emailtest.t
+++ b/tests/emailtest.t
@@ -1,7 +1,8 @@
+\l nlp.q
 \l init.q
 \d .nlp
-lines: read0 `:./data/test.mbox;
-emails:email.i.parseMail each "\n" sv/:  (where lines like "From *") cut lines;
+lines: read0 `:tests/data/test.mbox;
+emails:email.parseMail each "\n" sv/:  (where lines like "From *") cut lines;
 to: 9#enlist enlist("";"john.doe@domain.com");
 to[0;0;0]:"John Doe";
 emails[`to]~to
diff --git a/tests/nlptest.t b/tests/nlptest.t
index dd68b2e..778610f 100644
--- a/tests/nlptest.t
+++ b/tests/nlptest.t
@@ -1,9 +1,10 @@
+\l nlp.q
 \l init.q
 \d .nlp
 charPosParser:newParser[`en; `sentChars`starts`tokens]
 doc:first charPosParser enlist text:"Le café noir était pour André Benoît. Mes aïeux été vieux."
 all(doc[`tokens]~`$("le";"café";"noir";"était";"pour";"andré";"benoît";"mes";"aïeux";"été";"vieux");(doc[`starts] cut text)~("Le ";"café ";"noir ";"était ";"pour ";"André ";"Benoît. ";"Mes ";"aïeux ";"été ";"vieux.");(doc[`sentChars;;0] cut text)~("Le café noir était pour André Benoît. ";"Mes aïeux été vieux.");((0,doc[`sentChars;;1]) cut text)~("Le café noir était pour André Benoît.";" Mes aïeux été vieux.";""))
-text: first (enlist "*";",";1) 0: `:./data/miniJeff.txt
+text: first (enlist "*";",";1) 0: `:tests/data/miniJeff.txt
 p:newParser[`en; `tokens`isStop];
 corpus:p text;
 keywords:TFIDF corpus;
@@ -14,7 +15,10 @@ keywords[0; `billion] > keywords[0; `transacting]
 enlist[(`u#`$())!()]~TFIDF([]tokens:enlist `$(); isStop:enlist `boolean$());
 keywords:TFIDF enlist corpus 1;
 98h~type keywords
-p:newParser[`en; enlist`keywords];
+keywords_tot:TFIDF_tot corpus
+keywords_tot[`erv]~keywords_tot[`published]
+keywords_tot[`mpr] > keywords_tot[`attached]
+p:newParser[`en;`keywords];
 corpus:p text;
 1f~compareDocs . corpus[`keywords]0 0
 0f~compareDocs[(enlist`a)!enlist 1;(enlist `b)!enlist 1]
@@ -27,6 +31,10 @@ truncate:{[precision; x]coefficient: 10 xexp precision;reciprocal[coefficient] *
 0f~truncate[3] cosineSimilarity[0 1; 1 0]
 1f~truncate[3] cosineSimilarity[0 1; 0 1]
 1f~truncate[3] cosineSimilarity[1; 1]
+centroid:sum corpus`keywords
+1 1f~2#desc compareDocToCentroid[centroid]each corpus`keywords
+1 1 1 1f~4#desc compareDocToCorpus[corpus`keywords;0]
+0 0 0f~3#asc compareDocToCorpus[corpus`keywords;0]
 explainSimilarity[(`a`b`c)!(.1 .2 .3);(`e`f`g)!(.1 .2 .3)]~(`$())!`float$()
 all(explainSimilarity[(`a`b`c)!(.1 .2 .3); (`$())!(`float$())]~(`$())!(`float$());explainSimilarity[(`$())!(`float$());(`a`b`c)!(.1 .2 .3)]~(`$())!(`float$());explainSimilarity[(`$())!(`float$());(`$())!(`float$())]~(`$())!(`float$()))        
 all(explainSimilarity[(enlist `a)!enlist .1;(enlist `a)!enlist .1]~(enlist `a)!enlist 1f;explainSimilarity[(enlist `a)!enlist .1;(enlist `a)!enlist .5]~(enlist `a)!enlist 1f;explainSimilarity[(enlist `a)!enlist .1;(enlist `b)!enlist .5]~(`$())!(`float$()))       
@@ -60,7 +68,7 @@ posParser:newParser[`en; `uniPOS`pennPOS`tokens]
 findPOSRuns[`uniPOS; `ADV`VERB;first posParser enlist". ."]~()
 findPOSRuns[`uniPOS; `DET;first posParser enlist "The"]~enlist(`the; enlist 0)
 findPOSRuns[`uniPOS; `VERB;first posParser enlist"The train from nowhere"]~()
-findPOSRuns[`uniPOS; `VERB;first posParser enlist"has been gone dancing"]~enlist(`$"has been gone dancing";0 1 2 3)
+findPOSRuns[`uniPOS; `VERB;first posParser enlist"has been gone dancing"]~enlist(`$"gone dancing";2 3)
 doc:first posParser enlist"Wade Hemsworth famously surveyed the Abitibi Waterways in North Ontario.";
 all(findPOSRuns[`uniPOS;`DET`PROPN;doc];findPOSRuns[`pennPOS;`DT`NNP`NNPS; doc])~\:((`$"wade hemsworth"; 0 1);(`$"the abitibi waterways"; 4 5 6);(`$"north ontario"; 8 9))
 p:newParser[`en;`tokens`isStop`sentIndices];
@@ -98,35 +106,40 @@ keywords:keywordsContinuous enlist doc;
 99h ~ type keywords
 keywords:keywordsContinuous corpus;
 {x~desc x} keywords `chairman`chief`group`enron`thanks`mountains
-emails:.nlp.loadEmails["data/test.mbox"]
+(1 1f,(2%3),(1%3),0.5 0.5 0.5 0.5 0.5 0.5)~value 10#ngram[enlist first corpus;2]
+1 1 .5 .5 1 1 1 1 1 1f~value 10#ngram[enlist first corpus;3]
+((`enrononline`management`report);(`management`report`june);(`report`june`attached))~key 3#ngram[enlist first corpus;3]
+emails:email.loadEmails["tests/data/test.mbox"]
 `sender`to`date`subject`contentType`payload`text~cols emails
-(last .nlp.loadEmails["data/test.mbox"]`text)~"Your email client does not support HTML mails."
+(last emails`text)~"Your email client does not support HTML mails."
 ("multipart/alternative";"multipart/alternative";"multipart/alternative";"multipart/alternative";"multipart/alternative";"multipart/alternative";"text/html";"multipart/alternative";"multipart/alternative")~emails`contentType
+`sender`to`volume~cols email.getGraph emails
+1~(last email.getGraph emails)`volume
 parseURLs["http://www.google.com"]~`scheme`domainName`path`parameters`query`fragment!("http";"www.google.com";"";"";"";"")
 parseURLs["ssh://samsquanch@mx4.hotmail.com"][`scheme`domainName]~("ssh";"samsquanch@mx4.hotmail.com")
 parseURLs["https://www.google.ca:1234/test/index.html;myParam?foo=bar&quux=blort#abc=123&def=456"]~(!) . flip ((`scheme;"https");(`domainName;"www.google.ca:1234");(`path;"/test/index.html");(`parameters;   "myParam");(`query;"foo=bar&quux=blort");(`fragment;"abc=123&def=456"))
 all(parseURLs["google.ca/test/index.html"][`scheme`domainName`path]~("http";"google.ca";"/test/index.html");parseURLs["www.google.co.uk"][`scheme`domainName`path]~("http";"www.google.co.uk";""))
 parseURLs["https://网站.中国.com"]~`scheme`domainName`path`parameters`query`fragment!("https";"网站.中国.com";"";"";"";"")
 (parseURLs each ("https://travel.gc.ca/";"https://www.canada.ca/en/revenue-agency.html"))~([]scheme:("https"; "https");domainName:("travel.gc.ca"; "www.canada.ca");path:(enlist "/";"/en/revenue-agency.html");parameters: (""; "");query:(""; "");fragment:(""; ""))
-\d .
-text: first (enlist "*";",";1) 0: `:./data/miniJeff.txt
-p:.nlp.newParser[`en;`tokens`isStop`text]
+seq:bi_gram[corpus]
+seq[`enrononline`management]~1f
+seq[`management`report]>seq[`report`june]
+`en~detectLang["This is a sentence"]
+`de~detectLang["Das ist ein Satz"]
+`fr~detectLang["C'est une phrase"]
+ascii["This is ä senteñcê"]~"This is  sentec"
+rmv_list   :("http*";"*,";"*&*";"*[0-9]*")
+rmv_custom["https//:google.com & https//:bing.com are 2 search engines!";rmv_list]~"are search engines!"
+rmv_master["https//:google.com & https//:bing.com are 2 search engines!";",.:?!/@'\n";""]~"httpsgooglecom & httpsbingcom are 2 search engines"
+loadDir:loadTextFromDir["tests/data/test.mbox"]
+`fileName`path`text~cols loadDir
+loadDir[`fileName]~enlist `test.mbox
+text: first (enlist "*";",";1) 0: `:tests/data/miniJeff.txt
+p:newParser[`en;`tokens`isStop`text]
 corpus:p text
 phonecall:corpus i:where corpus[`text] like "*Telephone Call*"
-remaining:corpus til[count corpus]except i
-(`message`murdock`erica`error`jerry;`enron`know`let,`meeting`company)~key each 5#/:.nlp.compareCorpora[phonecall;remaining]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+remaining:corpus til[count corpus]except n
+(`message`murdock`erica`error`jerry;`enron`know`let,`meeting`company)~key each 5#/:compareCorpora[phonecall;remaining]
+txt:"You can call the number 123 456 7890 or email us on name@email.com in book an appoinment for January,February and March for £30.00"
+findRegex[txt;`phoneNumber`emailAddress`yearmonthList`money]~`phoneNumber`emailAddress`yearmonthList`money!(enlist (" 123 456 7890";23;36);enlist("name@email.com";52;66);(("January";93;100);("February";101;109);("March";114;119);("30";125;127);("00";128;130));enlist("\302\24330.00";124;130))
+\d .
diff --git a/tests/parsertest.t b/tests/parsertest.t
index f669d2d..c20334e 100644
--- a/tests/parsertest.t
+++ b/tests/parsertest.t
@@ -1,7 +1,8 @@
+\l nlp.q
 \l init.q
 \d .nlp
-basicParser:newParser[`en;enlist `tokens];
-keywordParser:newParser[`en; enlist `keywords]
+basicParser:newParser[`en;`tokens];
+keywordParser:newParser[`en;`keywords]
 allSpacyOptionsParser:newParser[`en;`likeEmail`likeURL`likeNumber`isStop`tokens`lemmas`uniPOS`pennPOS`starts];
 allQOptionsParser:newParser[`en; `keywords`sentChars`sentIndices];
 textPreservingParser:newParser[`en; `tokens`text];
@@ -17,7 +18,7 @@ all(keywords[0;`lacrosse] > keywords[0;`team];keywords[0;`lacrosse] < keywords[4
 docs: ("The great Québec maple syrup heist"; "Québec is great");
 cols[keywordParser docs] ~ enlist `keywords
 result:allSpacyOptionsParser enlist"Email Jeff Bezos at jeff@amazon.com. He gets 65,536 emails a day from people asking about www.blueorigin.com or https://amazon.ca.";
-all(cols[result] ~`likeEmail`likeURL`likeNumber`isStop`tokens`lemmas`uniPOS`pennPOS`starts;result[`likeEmail] ~enlist 000010000000000000b;result[`likeURL]~enlist 000000000000000101b;result[`likeNumber]~enlist 000000010000000000b;result[`isStop]~enlist 000101110101001010b;result[`tokens]~enlist `email`jeff`bezos`at,(`$"jeff@amazon.com"),`he`gets,(`$"65,536"),`emails`a`day`from`people`asking`about`www.blueorigin.com`or`https://amazon.ca;result[`lemmas]~ enlist `Email`Jeff`Bezos`at,(`$"jeff@amazon.com"),(`$"-PRON-"),`get,(`$"65,536"),`email`a`day`from`people`ask`about`www.blueorigin.com`or`https://amazon.ca;result[`uniPOS]~ enlist `PROPN`PROPN`PROPN`ADP`X`PRON`VERB`NUM`NOUN`DET`NOUN`ADP`NOUN`VERB`ADP`NOUN`CCONJ`NOUN;result[`pennPOS] ~ enlist `NNP`NNP`NNP`IN`ADD`PRP`VBZ`CD`NNS`DT`NN`IN`NNS`VBG`IN`NN`CC`NN;result[`starts]~enlist 0 6 11 17 20 37 40 45 52 59 61 65 70 77 84 90 109 112)
+all(cols[result] ~`likeEmail`likeURL`likeNumber`isStop`tokens`lemmas`uniPOS`pennPOS`starts;result[`likeEmail] ~enlist 000010000000000000b;result[`likeURL]~enlist 000000000000000101b;result[`likeNumber]~enlist 000000010000000000b;result[`isStop]~enlist 000101110101001010b;result[`tokens]~enlist `email`jeff`bezos`at,(`$"jeff@amazon.com"),`he`gets,(`$"65,536"),`emails`a`day`from`people`asking`about`www.blueorigin.com`or`https://amazon.ca;result[`lemmas]~ enlist `email`Jeff`Bezos`at,(`$"jeff@amazon.com"),(`$"-PRON-"),`get,(`$"65,536"),`email`a`day`from`people`ask`about`www.blueorigin.com`or`https://amazon.ca;result[`uniPOS]~ enlist `NOUN`PROPN`PROPN`ADP`X`PRON`VERB`NUM`NOUN`DET`NOUN`ADP`NOUN`VERB`ADP`X`CCONJ`PROPN;result[`pennPOS] ~ enlist `NN`NNP`NNP`IN`ADD`PRP`VBZ`CD`NNS`DT`NN`IN`NNS`VBG`IN`ADD`CC`NNP;result[`starts]~enlist 0 6 11 17 20 37 40 45 52 59 61 65 70 77 84 90 109 112)
 result:allQOptionsParser[enlist"O, the year was 1778 how I wish I was in Sherbrooke now. A letter of marque came from the king."];
 all(cols[result]~`keywords`sentChars`sentIndices;result[`keywords]~ enlist `o`year`wish`sherbrooke`letter`marque`came`king!8#0.125;result[`sentChars] ~ enlist (0 56; 57 95);result[`sentIndices] ~ enlist 0 13)
 result:first sentenceParser enlist" Hornpipe, jig, and reel. \nThis is a good song"
diff --git a/tests/senttest.t b/tests/senttest.t
index cbee02a..bb0d84d 100644
--- a/tests/senttest.t
+++ b/tests/senttest.t
@@ -1,23 +1,24 @@
+\l nlp.q
 \l init.q
 \d .nlp
-sent.amplifyEP[""]~0f
-sent.amplifyEP[enlist "!"]~.292
-0 .292 .584 .876 1.168 1.168 ~sent.amplifyEP each ("ok"; "bad!"; "no!worse!"; "terrible!!!"; "ghastly!!!! eew"; "!!!!!!!!!!")
-sent.amplifyQM[""]~0f
-sent.amplifyQM[enlist "?"]~0f
-0 0 0.36 0.54 0.96 0.96~sent.amplifyQM each ("yes"; "oh?"; "oh? really?"; "you don't say???"; "forsooth????"; "????????????")
-all (sent.findSequence[`a`b`c`d;enlist`c]~enlist 2;sent.findSequence[`c`b`c`d; enlist `c] ~ 0 2)
-all (sent.findSequence[`a`b`c`d`e`f;`c`d]~enlist 2;sent.findSequence[`a`b`c`d`e`f`c`d; `c`d] ~ 2 6;sent.findSequence[`a`b`c`d`e`f`a`b`c`d`e`g`a`b`c`d; `a`b`c`d] ~ 0 6 12)
-sent.findSequence[`a`b`c`d;`c]~enlist 2;
-sent.findSequence[`$();`a`b`c]~`long$()
-all(sent.findSequence[enlist`a;`a]~enlist 0;sent.findSequence[enlist`a;`b]~`long$())
-sent.findSequence[`a`b`c`d`e`a;`a]~0 5
-sent.findSequence[0 0 4 5 1 2 4 5;4 5]~2 6
-sent.findSequence["Facebook,Tim Cook";"oo"]~5 14
-sent.butCheck[`$(); `float$()] ~ `float$()
-all(sent.butCheck[enlist `good; enlist 2f] ~ enlist 2f;sent.butCheck[enlist`but;enlist 0f]~enlist 0f)
-all(sent.butCheck[`that`was`good`but; 0 0 1 0f] ~ 0 0 .5 0f;sent.butCheck[`that`was`good`but`it; 0 0 1 0 0f] ~ 0 0 .5 0 0f;sent.butCheck[`but`it`was`ok; 0 0 0 1f] ~ 0 0 0 1.5f;sent.butCheck[`tasty`but`it`smelled`bad; 2 0 0 -1.5 -2f] ~ 1 0 0 -2.25 -3f)
-sent.butCheck[`it`was`good`and`useful`but`boring`and`gross;0 0 1 0 1.5 0 -1 0 -2]~0 0 .5 0 .75 0 -1.5 0 -3
+sent.i.amplifyEP[""]~0f
+sent.i.amplifyEP[enlist "!"]~.292
+0 .292 .584 .876 1.168 1.168 ~sent.i.amplifyEP each ("ok"; "bad!"; "no!worse!"; "terrible!!!"; "ghastly!!!! eew"; "!!!!!!!!!!")
+sent.i.amplifyQM[""]~0f
+sent.i.amplifyQM[enlist "?"]~0f
+0 0 0.36 0.54 0.96 0.96~sent.i.amplifyQM each ("yes"; "oh?"; "oh? really?"; "you don't say???"; "forsooth????"; "????????????")
+all (sent.i.findSequence[`a`b`c`d;enlist`c]~enlist 2;sent.i.findSequence[`c`b`c`d; enlist `c] ~ 0 2)
+all (sent.i.findSequence[`a`b`c`d`e`f;`c`d]~enlist 2;sent.i.findSequence[`a`b`c`d`e`f`c`d; `c`d] ~ 2 6;sent.i.findSequence[`a`b`c`d`e`f`a`b`c`d`e`g`a`b`c`d; `a`b`c`d] ~ 0 6 12)
+sent.i.findSequence[`a`b`c`d;`c]~enlist 2;
+sent.i.findSequence[`$();`a`b`c]~`long$()
+all(sent.i.findSequence[enlist`a;`a]~enlist 0;sent.i.findSequence[enlist`a;`b]~`long$())
+sent.i.findSequence[`a`b`c`d`e`a;`a]~0 5
+sent.i.findSequence[0 0 4 5 1 2 4 5;4 5]~2 6
+sent.i.findSequence["Facebook,Tim Cook";"oo"]~5 14
+sent.i.butCheck[`$(); `float$()] ~ `float$()
+all(sent.i.butCheck[enlist `good; enlist 2f] ~ enlist 2f;sent.i.butCheck[enlist`but;enlist 0f]~enlist 0f)
+all(sent.i.butCheck[`that`was`good`but; 0 0 1 0f] ~ 0 0 .5 0f;sent.i.butCheck[`that`was`good`but`it; 0 0 1 0 0f] ~ 0 0 .5 0 0f;sent.i.butCheck[`but`it`was`ok; 0 0 0 1f] ~ 0 0 0 1.5f;sent.i.butCheck[`tasty`but`it`smelled`bad; 2 0 0 -1.5 -2f] ~ 1 0 0 -2.25 -3f)
+sent.i.butCheck[`it`was`good`and`useful`but`boring`and`gross;0 0 1 0 1.5 0 -1 0 -2]~0 0 .5 0 .75 0 -1.5 0 -3
 compare:{value (floor 1000* sent.score x) % 1000}
 all(compare[""]~0 0 0 0f;compare["\t\t\r\n\n"]~0 0 0 0f;compare["a  b  c 1"]~0 0 0 0f)
 all(compare["bad"]~-.543 0 1 0f;compare["racist"]~-.613 0 1 0f;compare["good"]~.44 1 0 0f;compare["free"] ~.51 1 0 0f;compare["those"]~0 0 0 1f;compare["123"]~0 0 0 1f)
diff --git a/tests/utilstest.t b/tests/utilstest.t
index 64f99d1..6e7414d 100644
--- a/tests/utilstest.t
+++ b/tests/utilstest.t
@@ -1,3 +1,4 @@
+\l nlp.q
 \l init.q
 \d .nlp
 all(i.findRuns[where 000000000b]~();i.findRuns[where 100000000b]~();i.findRuns[where 100000001b]~();i.findRuns[where 101010101b]~();i.findRuns[where 100100100b]~())
diff --git a/time.q b/time.q
deleted file mode 100644
index ccc48e0..0000000
--- a/time.q
+++ /dev/null
@@ -1,10 +0,0 @@
-\d .nlp
-
-// Turns string matching time regex into a q time
-tm.parseTime:{
-  tm:"T"$x where vs[" ";x][0]in"1234567890:.";
-  ampm:regex.check[;x]each regex.objects`am`pm;
-  tm+$[ampm[0]&12=`hh$tm;-1;ampm[1]&12>`hh$tm;1;0]*12:00}
-
-// Find all times : list of 4-tuples (time; timeText; startIndex; 1+endIndex)
-tm.findTimes:{time:(tm.parseTime each tmtxt[;0]),'tmtxt:regex.matchAll[regex.objects.time;x]; time where time[;0]<24:01}