Skip to content

Commit

Permalink
some updates (#468)
Browse files Browse the repository at this point in the history
* some formatting
* 2023 wle conf
* authorsContributedPerRegion
* update PoiXwpfV
* ttn reader
* more warnings
* lastMonth stat
* lastMonth stat
* typo
* evict cache on parse exception
* 2023 rating
* update scalafmt
* java-version: 11 for PR
* java-version: 11 for appveyor
* drop scala 2.12
* remove number
* fix tests
  • Loading branch information
intracer authored Dec 17, 2023
1 parent 8fc5cab commit 56080fc
Show file tree
Hide file tree
Showing 26 changed files with 917 additions and 301 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: temurin
java-version: 8
java-version: 11
cache: sbt
- name: Build and Test
run: sbt -v +test
2 changes: 2 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
version = 3.7.17
runner.dialect = scala213
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ install:
)
[System.IO.Compression.ZipFile]::ExtractToDirectory("C:\sbt-bin.zip", "C:\sbt")
}
- cmd: SET JAVA_HOME=C:\Program Files\Java\jdk11
- cmd: SET PATH=C:\sbt\sbt\bin;%JAVA_HOME%\bin;%PATH%
- cmd: SET SBT_OPTS=-XX:MaxPermSize=2g -Xmx4g
build_script:
Expand Down
5 changes: 4 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ lazy val isScala213 = settingKey[Boolean]("Is the scala version 2.13.")
lazy val commonSettings = Seq(
organization := "org.scalawiki",
version := "0.7.0-SNAPSHOT",
crossScalaVersions := Seq(Scala212V, Scala213V),
crossScalaVersions := Seq(Scala213V),
scalaVersion := crossScalaVersions.value.last,
isScala213 := scalaVersion.value.startsWith("2.13."),
Global / excludeLintKeys += isScala213,
Expand Down Expand Up @@ -83,8 +83,11 @@ lazy val bots = Project("scalawiki-bots", file("scalawiki-bots"))
"com.github.pathikrit" %% "better-files" % BetterFilesV,
"org.rogach" %% "scallop" % ScallopV,
"org.xwiki.commons" % "xwiki-commons-blame-api" % BlameApiV,
Library.Commons.io,
Library.Poi.scratchpad,
Library.Poi.ooxml,
Library.Poi.ooxmlFull,
Library.Poi.poi,
Library.Poi.converter,
Library.Play.twirlApi(isScala213.value),
"com.github.tototoshi" %% "scala-csv" % ScalaCsvV
Expand Down
24 changes: 16 additions & 8 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ object Dependencies {
val ReactiveStreamsV = "1.0.4"
val RetryV = "0.3.6"
val Scala213V = "2.13.12"
val Scala212V = "2.12.18"
val ScalaChartV = "0.8.0"
val ScalaCheckV = "1.17.0"
val ScalaCsvV = "1.3.10"
Expand Down Expand Up @@ -49,13 +48,17 @@ object Dependencies {
Library.Commons.lang,
"com.typesafe" % "config" % TypesafeConfigV,
Library.Poi.ooxml,
Library.Poi.poi,
Library.Poi.ooxmlFull,
Library.Jackson.core,
Library.Jackson.annotations,
Library.Jackson.databind,
"joda-time" % "joda-time" % JodaTimeV,
"org.slf4j" % "slf4j-api" % Slf4jV,
"ch.qos.logback" % "logback-classic" % LogbackClassicV,
"javax.xml.bind" % "jaxb-api" % "2.3.1"
"javax.xml.bind" % "jaxb-api" % "2.3.1",
"org.apache.logging" % "log4j:log4j-core" % "2.18.1",
"org.apache.logging" % "log4j:log4j-api" % "2.18.1"
)

object Library {
Expand All @@ -81,25 +84,30 @@ object Dependencies {

def TwirlV(isScala213: Boolean) = "1.5.2"

def json(isScala213: Boolean) = "com.typesafe.play" %% "play-json" % PlayJsonV(isScala213)
def json(isScala213: Boolean) =
"com.typesafe.play" %% "play-json" % PlayJsonV(isScala213)

def twirlApi(isScala213: Boolean) = "com.typesafe.play" %% "twirl-api" % TwirlV(isScala213)
def twirlApi(isScala213: Boolean) =
"com.typesafe.play" %% "twirl-api" % TwirlV(isScala213)
}

object Poi {
val PoiV = "5.2.5"
val PoiXwpfV = "1.0.6"
val PoiXwpfV = "2.0.4"

val scratchpad = "org.apache.poi" % "poi-scratchpad" % PoiV
val poi = "org.apache.poi" % "poi" % PoiV
val ooxml = "org.apache.poi" % "poi-ooxml" % PoiV
val converter = "fr.opensagres.xdocreport" % "org.apache.poi.xwpf.converter.xhtml" % PoiXwpfV
val ooxmlFull = "org.apache.poi" % "poi-ooxml-full" % PoiV

val converter = "fr.opensagres.xdocreport" % "fr.opensagres.xdocreport.converter.docx.xwpf" % PoiXwpfV
}

object Commons {
val CommonsCodecV = "1.16.0"
val CommonsCompressV = "1.25.0"
val CommonsLang3V = "3.7"
val CommonsIoV = "2.6"
val CommonsIoV = "2.7"

val codec = "commons-codec" % "commons-codec" % CommonsCodecV
val io = "commons-io" % "commons-io" % CommonsIoV
Expand Down Expand Up @@ -132,4 +140,4 @@ object Dependencies {

}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class MessageBot(val conf: Config) extends ActionLibrary with QueryLibrary {
*/
val talkPageMessage = conf.as[Message]("talk-page")

implicit lazy val bot = MwBot.fromHost(host)
implicit lazy val bot: MwBot = MwBot.fromHost(host)

def run() = {
for (users <- fetchUsers(userListPage))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package org.scalawiki.bots.museum

import java.io.{ByteArrayOutputStream, File, FileInputStream}
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver
import fr.opensagres.poi.xwpf.converter.xhtml.{XHTMLConverter, XHTMLOptions}

import java.io.{ByteArrayOutputStream, File, FileInputStream}
import org.apache.poi.ooxml.POIXMLDocument
import org.apache.poi.hwpf.converter.WordToHtmlConverter
import org.apache.poi.hwpf.extractor.WordExtractor
import org.apache.poi.xwpf.converter.core.FileURIResolver
import org.apache.poi.xwpf.converter.xhtml.{XHTMLConverter, XHTMLOptions}
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument
import better.files.{File => SFile}
Expand Down
43 changes: 43 additions & 0 deletions scalawiki-bots/src/main/scala/org/scalawiki/bots/np/TTN.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.scalawiki.bots.np

import org.apache.poi.ss.usermodel.{Cell, CellType}

case class TTN(ttn: String,
date: String,
route: String,
sender: String,
senderContact: String,
receiver: String,
receiverContact: String,
description: String,
mass: Double,
places: Int,
value: Double,
cost: Double) {
def month: String = date.split("\\.").tail.reverse.mkString(".")
def year: String = date.split("\\.").last
def yyMmDd: String = date.split("\\.").reverse.mkString(".")
}

object TTN {
def apply(cells: Seq[Cell]): Option[TTN] = {
if (cells.headOption.exists(_.getCellType == CellType.NUMERIC)) {
Some(
TTN(
ttn = cells(1).getStringCellValue,
date = cells(2).getStringCellValue,
route = cells(3).getStringCellValue,
sender = cells(4).getStringCellValue,
senderContact = cells(5).getStringCellValue,
receiver = cells(6).getStringCellValue,
receiverContact = cells(7).getStringCellValue,
description = cells(8).getStringCellValue,
mass = cells(9).getNumericCellValue,
places = cells(10).getNumericCellValue.toInt,
value = cells(11).getNumericCellValue,
cost = cells(12).getNumericCellValue
)
)
} else None
}
}
123 changes: 123 additions & 0 deletions scalawiki-bots/src/main/scala/org/scalawiki/bots/np/TTNReader.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package org.scalawiki.bots.np

import org.apache.poi.xssf.usermodel.XSSFWorkbook

import java.io.{File, FileInputStream}
import scala.jdk.CollectionConverters._

case class Person(contact: String, ttns: Seq[TTN])

object Person {
def receivers(ttns: Seq[TTN]) = {
ttns.groupBy(_.receiverContact).map {
case (contact, personReceived) => Person(contact, personReceived)
}

}
}

case class TTNData(ttns: Seq[TTN]) {

val byMonth =
ttns.groupBy(_.month).view.mapValues(_.map(_.cost).sum).toSeq.sortBy(_._1)
val byYear = byMonth.groupMap {
case (month, ttns) => month.split("\\.").head
} { case (month, ttns) => ttns }

def variousStat(): Unit = {
val byYearAvg =
byYear.view.mapValues(ttns => ttns.sum / ttns.size).toSeq.sortBy(_._1)

val year2023 = ttns.filter(_.year == "2022")
val lastYear: Seq[(Int, Seq[String])] = year2023
.groupBy(_.receiverContact)
.view
.mapValues(x => x.size)
.map {
case (contact, count) => (count, contact)
}
.groupBy(_._1)
.view
.mapValues { x =>
x.toSeq.map(_._2).distinct.sorted
}
.toSeq
.sortBy(_._1)

println(byMonth)
println(byYearAvg)

lastYear.filter(_._1 >= 5) foreach {
case (count, people) => println(s"$count: $people")
}

// ttns.filter(x => x.receiverContact.contains("Мамон") && x.year == "2023").sortBy(_.yyMmDd).foreach { t =>
// println(s"${t.date}, ${t.description}, ${t.mass}, ${t.cost}")
// }

val distinct = year2023.map(_.receiverContact).distinct.size
val all = year2023.size

println(s"All: $all, distinct: $distinct")
}

}

object TTNReader {
def main(args: Array[String]): Unit = {
val wlmNumbers = WlmContacts.getNumbers

val dir = new File("c:\\wmua\\np")
val ttns2023 = TTNData(readDir(dir)).ttns.filter(_.year == "2023")
val ttnsLastMonth = ttns2023.filter { ttn =>
!ttn.receiverContact.contains("Корбут") &&
ttn.month == "2023.08" // || ttn.month == "2023.09"
}
println("Ttns: " + ttnsLastMonth.size)

val wlmTtns = ttnsLastMonth.filter { ttn =>
wlmNumbers.exists(n => ttn.receiverContact.contains(n))
}
val wlmTtnsWithIndex = wlmTtns.sortBy(_.yyMmDd).zipWithIndex.map(_.swap)
println("wlm ttns: " + wlmTtnsWithIndex.size)
// wlmTtnsWithIndex.foreach(println)
val wlmTtnsNumbers = wlmTtns.map(_.ttn).toSet

val nonWlmTtns = ttnsLastMonth
.filterNot(ttn => wlmTtnsNumbers.contains(ttn.ttn))
.sortBy(_.yyMmDd)
.zipWithIndex
.map(_.swap)

println("not wlm ttns: " + nonWlmTtns.size)
// nonWlmTtns.foreach(println)
// val receivers = ttnsLastMonth.map(_.receiverContact).distinct.sorted
// println("receivers: " + receivers.size)
// receivers.foreach(println)

// ttns2023
// .filter(x => x.receiverContact.contains("380681234567") && x.year == "2023")
// .sortBy(_.yyMmDd).zipWithIndex
// .foreach { case (t, i) =>
// println(s"${i+1}. ${t.date}, ${t.description}, ${t.mass}, ${t.cost}, ${t.receiverContact}")
// }

}

private def readDir(dir: File): Seq[TTN] = {
val files = dir.listFiles().filter(_.getName.endsWith(".xlsx"))
files.flatMap(readFile)
}

private def readFile(file: File): Seq[TTN] = {
val fis = new FileInputStream(file)
val workbook = new XSSFWorkbook(fis)
val sheet = workbook.getSheetAt(0)
val rowIterator = sheet.iterator.asScala
rowIterator.toSeq.flatMap { row =>
TTN.apply(row.cellIterator.asScala.toSeq)
}
}

def readWlmPhoneNumbers() = {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.scalawiki.bots.np

object WlmContacts {

private val Number = """[ +][\d\-()\s]{10,}""".r

def main(args: Array[String]): Unit = {
println(getNumbers.size)
getNumbers.foreach(println)
}

def getLines: Seq[String] = {
Seq(
" 0931234567",
" 380671234567",
" 063-123-45-67",
" 097 123-45-67",
" 095 123 45 67",
" 095-1234567",
" (097)1234567",
" 380 (63) 123 45 67",
" 38 050 123 45 67"
)
}

def getNumbers: Seq[String] = {
getLines.flatMap(getNumber)
}

def getNumber(line: String): Seq[String] = {
Number
.findAllIn(line)
.toSeq
.map(_.filter(_.isDigit))
.collect {
case n if n.length == 10 => "38" + n
case n if n.length == 12 => n
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.scalawiki.bots.np

import org.specs2.mutable.Specification

class WlmContactsSpec extends Specification{

"WlmContacts" should {
val numbers = WlmContacts.getNumbers
"pick 10 digits" in {
numbers.contains("380931234567") === true
}

"pick 12 digits" in {
numbers.contains("380671234567") === true
}

"pick dashed/spaced/braced digits" in {
numbers.contains("380631234567") === true
numbers.contains("380971234567") === true
numbers.contains("380951234567") === true
numbers.contains("380951234567") === true
numbers.contains("380971234567") === true
numbers.contains("380631234567") === true
numbers.contains("380501234567") === true
}
}
}
Loading

0 comments on commit 56080fc

Please sign in to comment.