Skip to content

Commit

Permalink
Merge pull request #1360 from adpi2/index-scala-2
Browse files Browse the repository at this point in the history
Fix indexing of non-standard libs including scala-compiler, scala-library etc
  • Loading branch information
adpi2 authored Mar 6, 2024
2 parents a23fa83 + 5804f9f commit 61150b6
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,30 @@ import scaladex.infra.DataPaths
case class NonStandardLib(
groupId: String,
artifactId: String,
lookup: ScalaTargetLookup
lookup: BinaryVersionLookup
)

sealed trait ScalaTargetLookup
sealed trait BinaryVersionLookup

/**
* The version is encoded in the pom file
* dependency on org.scala-lang:scala-library
* ex: io.gatling : gatling-compiler : 2.2.2
*/
case object ScalaTargetFromPom extends ScalaTargetLookup
object BinaryVersionLookup {
/**
* The version is encoded in the pom file
* dependency on org.scala-lang:scala-library
* ex: io.gatling : gatling-compiler : 2.2.2
*/
case object FromDependency extends BinaryVersionLookup

/**
* The project is a plain-java project, thus no ScalaTarget.
* ex: com.typesafe : config : 1.3.1
*/
case object NoScalaTargetPureJavaDependency extends ScalaTargetLookup
/**
* The project is a plain-java project, thus no ScalaTarget.
* ex: com.typesafe : config : 1.3.1
*/
case object Java extends BinaryVersionLookup

/**
* The version is encoded in the version (ex: scala-library itself)
*/
case object ScalaTargetFromVersion extends ScalaTargetLookup
/**
* The version is encoded in the version (ex: scala-library itself)
*/
case object FromArtifactVersion extends BinaryVersionLookup
}

object NonStandardLib {

Expand All @@ -58,9 +60,9 @@ object NonStandardLib {
case (artifact, rawLookup) =>
val lookup =
rawLookup match {
case "pom" => ScalaTargetFromPom
case "java" => NoScalaTargetPureJavaDependency
case "version" => ScalaTargetFromVersion
case "pom" => BinaryVersionLookup.FromDependency
case "java" => BinaryVersionLookup.Java
case "version" => BinaryVersionLookup.FromArtifactVersion
case _ => sys.error("unknown lookup: '" + rawLookup + "'")
}

Expand Down
2 changes: 1 addition & 1 deletion modules/server/src/main/scala/scaladex/server/Server.scala
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ object Server extends LazyLogging {
val filesystem = FilesystemStorage(config.filesystem)
val publishProcess = PublishProcess(paths, filesystem, webDatabase, config.env)(publishPool, system)
val sonatypeClient = new SonatypeClientImpl()
val sonatypeSynchronizer = new SonatypeService(schedulerDatabase, sonatypeClient, publishProcess)
val sonatypeSynchronizer = new SonatypeService(paths, schedulerDatabase, sonatypeClient, publishProcess)
val adminService =
new AdminService(config.env, schedulerDatabase, searchEngine, githubClient, sonatypeSynchronizer)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,14 @@ class AdminService(
githubClientOpt.map { client =>
val githubUpdater = new GithubUpdater(database, client)
new JobScheduler(Job.githubInfo, githubUpdater.updateAll)
} ++
Option.when(!env.isLocal)(new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing))
} ++ (
if (!env.isLocal) {
Seq(
new JobScheduler(Job.missingMavenArtifacts, sonatypeSynchronizer.findMissing),
new JobScheduler(Job.nonStandardArtifacts, sonatypeSynchronizer.findNonStandard)
)
} else Seq.empty
)
seq.map(s => s.job.name -> s).toMap
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,12 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
* if the developer follow this convention we extract the relevant parts and we mark
* the library as standard. Otherwise we either have a library like gatling or the scala library itself
*
* @return The artifact name (without suffix), the Scala target, whether this project is a usual Scala library or not
* @return The artifact name (without suffix), the binary version, whether this project is a standard Scala library or not
*/
private def extractMeta(pom: ArtifactModel): Option[ArtifactMeta] = {
val nonStandardLookup =
nonStandardLibs
.find(lib =>
lib.groupId == pom.groupId &&
lib.artifactId == pom.artifactId
nonStandardLibs.find(lib =>
lib.groupId == pom.groupId && lib.artifactId == pom.artifactId
)
.map(_.lookup)

Expand Down Expand Up @@ -118,7 +116,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
}

// For example: io.gatling
case Some(ScalaTargetFromPom) =>
case Some(BinaryVersionLookup.FromDependency) =>
for {
dep <- pom.dependencies.find { dep =>
dep.groupId == "org.scala-lang" &&
Expand All @@ -133,7 +131,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
isNonStandard = true
)
// For example: typesafe config
case Some(NoScalaTargetPureJavaDependency) =>
case Some(BinaryVersionLookup.Java) =>
Some(
ArtifactMeta(
artifactName = pom.artifactId,
Expand All @@ -143,7 +141,7 @@ class ArtifactConverter(paths: DataPaths) extends LazyLogging {
)

// For example: scala-compiler
case Some(ScalaTargetFromVersion) =>
case Some(BinaryVersionLookup.FromArtifactVersion) =>
for (version <- SemanticVersion.parse(pom.version))
yield ArtifactMeta(
artifactName = pom.artifactId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,65 @@ import scaladex.core.model.Artifact._
import scaladex.core.service.SchedulerDatabase
import scaladex.core.service.SonatypeClient
import scaladex.core.util.ScalaExtensions._
import scaladex.data.cleanup.NonStandardLib
import scaladex.infra.DataPaths

class SonatypeService(
dataPaths: DataPaths,
database: SchedulerDatabase,
sonatypeService: SonatypeClient,
publishProcess: PublishProcess
)(implicit ec: ExecutionContext)
extends LazyLogging {
import SonatypeService._

def findNonStandard(): Future[String] = {
val nonStandardLibs = NonStandardLib.load(dataPaths)
for {
mavenReferenceFromDatabase <- database.getAllMavenReferences()
result <- nonStandardLibs.mapSync { lib =>
val groupId = Artifact.GroupId(lib.groupId)
// get should not throw: it is a fixed set of artifactIds
val artifactId = Artifact.ArtifactId.parse(lib.artifactId).get
findAndIndexMissingArtifacts(groupId, artifactId, mavenReferenceFromDatabase.toSet)
}
} yield s"Inserted ${result.sum} missing poms"
}

def findMissing(): Future[String] =
for {
groupIds <- database.getAllGroupIds()
mavenReferenceFromDatabase <- database.getAllMavenReferences().map(_.toSet)
groupIds = mavenReferenceFromDatabase.map(_.groupId).toSeq.sorted.map(Artifact.GroupId)
// we sort just to estimate through the logs the percentage of progress
result <- groupIds.sortBy(_.value).mapSync(g => findAndIndexMissingArtifacts(g, None))
} yield s"Inserted ${result.size} missing poms"
result <- groupIds.mapSync(g => findAndIndexMissingArtifacts(g, None, mavenReferenceFromDatabase))
} yield s"Inserted ${result.sum} missing poms"

def syncOne(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[String] =
for {
result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt)
mavenReferenceFromDatabase <- database.getAllMavenReferences()
result <- findAndIndexMissingArtifacts(groupId, artifactNameOpt, mavenReferenceFromDatabase.toSet)
} yield s"Inserted ${result} poms"

private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name]): Future[Int] =
private def findAndIndexMissingArtifacts(groupId: GroupId, artifactNameOpt: Option[Artifact.Name], knownRefs: Set[MavenReference]): Future[Int] =
for {
mavenReferenceFromDatabase <- database.getAllMavenReferences()
artifactIds <- sonatypeService.getAllArtifactIds(groupId)
scalaArtifactIds = artifactIds.filter(artifact =>
artifactNameOpt.forall(_ == artifact.name) && artifact.isScala && artifact.binaryVersion.isValid
)
result <- scalaArtifactIds
.mapSync(id => findAndIndexMissingArtifacts(groupId, id, mavenReferenceFromDatabase.toSet))
.mapSync(id => findAndIndexMissingArtifacts(groupId, id, knownRefs))
} yield result.sum

private def findAndIndexMissingArtifacts(
groupId: GroupId,
artifactId: ArtifactId,
mavenReferenceFromDatabase: Set[MavenReference]
knownRefs: Set[MavenReference]
): Future[Int] =
for {
versions <- sonatypeService.getAllVersions(groupId, artifactId)
mavenReferences = versions.map(v =>
MavenReference(groupId = groupId.value, artifactId = artifactId.value, version = v.toString)
)
missingVersions = findMissingVersions(mavenReferenceFromDatabase, mavenReferences)
missingVersions = mavenReferences.filterNot(knownRefs)
_ = if (missingVersions.nonEmpty)
logger.warn(s"${missingVersions.size} artifacts are missing for ${groupId.value}:${artifactId.value}")
missingPomFiles <- missingVersions.map(ref => sonatypeService.getPomFile(ref).map(_.map(ref -> _))).sequence
Expand All @@ -63,10 +79,4 @@ class SonatypeService(
case PublishResult.Success => true
case _ => false
}

}

object SonatypeService {
def findMissingVersions(fromDatabase: Set[MavenReference], fromSonatype: Seq[MavenReference]): Seq[MavenReference] =
fromSonatype.filterNot(fromDatabase)
}

This file was deleted.

5 changes: 5 additions & 0 deletions modules/template/src/main/scala/scaladex/view/Job.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ object Job {
"Find missing artifacts in Maven Central of the known group IDs.",
24.hours
)
val nonStandardArtifacts: Job = Job(
"non-standard-artifacts",
"Find missing non-standard artifacts from Maven Central",
2.hours
)

case class Status(state: State, results: Seq[Result], progress: Option[Progress]) {
def isStarted: Boolean = state.isInstanceOf[Started]
Expand Down

0 comments on commit 61150b6

Please sign in to comment.