Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Sonatype statistics #1751

Merged
merged 4 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
132 changes: 132 additions & 0 deletions .github/scripts/plot.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package plot

import java.io.File
import java.nio.file.{Files, Paths}
import java.time._

import com.twitter.algebird.Operators._
import plotly._
import plotly.element._
import plotly.layout._
import plotly.Plotly._
import upickle.default._
import ujson.{read => _, _}

import com.github.tototoshi.csv._

object Plot {
def writePlots(): Unit = {

object relevantVersion {
val stableVersionRegex = "^(\\d+)\\.(\\d+)\\.(\\d+)$".r

def unapply(version: String): Option[(Int, Int, Int)] =
version match {
case stableVersionRegex(majorStr, minorStr, patchStr) =>
val major = majorStr.toInt
val minor = minorStr.toInt
val patch = patchStr.toInt
if (major == 2) {
if (minor >= 11) Some((major, minor, patch))
else None
} else Some((major, minor, patch))
case _ => None
}
}

def csvToBars(
dir: File,
allowedVersion: String => Boolean,
filterOutMonths: Set[YearMonth] = Set()
): Seq[Trace] = {

val data = for {
year <- 2015 to Year.now(ZoneOffset.UTC).getValue
month <- 1 to 12
f = new File(dir, f"$year/$month%02d.csv")
if f.exists()
ym = YearMonth.of(year, month)
elem <- CSVReader
.open(f)
.iterator
.map(l => (ym, /* version */ l(0), /* downloads */ l(1).toInt))
.collect {
case (
date,
version @ relevantVersion(major, minor, patch),
downloads
) if allowedVersion(version) =>
(date, (major, minor, patch), downloads)
}
.toVector
} yield elem

data
.groupBy { case (_, version, _) => version }
.mapValues { stats =>
stats
.map { case (date, _, downloads) => (date, downloads) }
.filterNot { case (date, _) => filterOutMonths(date) }
.sortBy { case (date, _) => date }
}
.toSeq
.sortBy { case (version, _) => version }
.map { case ((major, minor, patch), stats) =>
val x = stats.map(_._1).map { m =>
plotly.element.LocalDateTime(m.getYear, m.getMonthValue, 1, 0, 0, 0)
}
val y = stats.map(_._2)
Bar(x, y, name = s"${major}.${minor}.${patch}")
}
}

val dataBase = stats.Params.base

val htmlSnippets =
for {
artifact <- stats.Params.artifacts
(baseDir, divId, title) <- Seq(
(
"per-version-stats",
s"${artifact}-total",
s"${artifact} (total downloads)"
),
(
"per-version-unique-ips",
s"${artifact}-unique",
s"${artifact} (unique IPs)"
)
)
bars = csvToBars(
dataBase.resolve(baseDir).resolve(artifact).toFile,
_ => true /* keep all the versions */
)
} yield s"""
|<h2 id="${divId}-plot">${title} <a href="#${divId}-plot">#</a></h2>
|<div id="${divId}"></div>
|<script>${Plotly.jsSnippet(
divId,
bars,
Layout(barmode = BarMode.Stack)
)}</script>
|""".stripMargin

val html =
s"""<!DOCTYPE html>
|<html>
|<head>
|<title>Scalafix Statistics</title>
|<script src="https://cdn.plot.ly/plotly-${Plotly.plotlyVersion}.min.js"></script>
|</head>
|<body>
|<h1>Scalafix Statistics</h1>
|${htmlSnippets.mkString}
|</body>
|</html>
|""".stripMargin

Files.createDirectories(dataBase)
Files.write(dataBase.resolve("index.html"), html.getBytes("UTF-8"))

}
}
184 changes: 184 additions & 0 deletions .github/scripts/sonatype-stats.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package stats

import java.nio.file._
import java.time.{YearMonth, ZoneOffset}

import com.softwaremill.sttp.quick._
import upickle.default._
import ujson.{read => _, _}

object Responses {

case class UniqueIpData(total: Int)
implicit val uniqueIpDataRW: ReadWriter[UniqueIpData] = macroRW
case class UniqueIpResp(data: UniqueIpData)
implicit val uniqueIpRespRW: ReadWriter[UniqueIpResp] = macroRW

case class Elem(id: String, name: String)
implicit val elemRW: ReadWriter[Elem] = macroRW

}

import Responses._

object Params {

// organization one was granted write access to
val proj = sys.env.getOrElse("SONATYPE_PROJECT", "ch.epfl.scala")
// actual organization used for publishing (must have proj as prefix)
val organization = sys.env.getOrElse("SONATYPE_PROJECT", proj)

val sonatypeUser = sys.env.getOrElse(
"SONATYPE_USERNAME",
sys.error("SONATYPE_USERNAME not set")
)
val sonatypePassword: String = sys.env.getOrElse(
"SONATYPE_PASSWORD",
sys.error("SONATYPE_PASSWORD not set")
)

val start = YearMonth.now(ZoneOffset.UTC)

val cutOff = start.minusMonths(4L)

// Note: this assumes the current working directory is the repository root directory!
val base = Paths.get("sonatype-stats")

val artifacts = Set(
"scalafix-core_2.12",
"scalafix-core_2.13",
"scalafix-interfaces"
)
}

case class Data(
base: Path,
ext: String,
empty: String => Boolean,
name: String,
tpe: String,
projId: String,
organization: String,
artifact: Option[String]
) {

def fileFor(monthYear: YearMonth): Path = {
val year = monthYear.getYear
val month = monthYear.getMonth.getValue
base.resolve(f"$year%04d/$month%02d.$ext")
}

def exists(monthYear: YearMonth): Boolean =
Files.isRegularFile(fileFor(monthYear))

def write(monthYear: YearMonth, content: String): Unit = {
System.err.println(s"Writing $monthYear (${content.length} B)")
val f = fileFor(monthYear)
Files.createDirectories(f.getParent)
Files.write(f, content.getBytes("UTF-8"))
}

def urlFor(monthYear: YearMonth) = {
val year = monthYear.getYear
val month = monthYear.getMonth.getValue

uri"https://oss.sonatype.org/service/local/stats/$name?p=$projId&g=$organization&a=${artifact
.getOrElse("")}&t=$tpe&from=${f"$year%04d$month%02d"}&nom=1"
}

def process(monthYears: Iterator[YearMonth]): Iterator[(YearMonth, Boolean)] =
monthYears
.filter { monthYear =>
!exists(monthYear)
}
.map { monthYear =>
val u = urlFor(monthYear)

System.err.println(s"Getting $monthYear: $u")

val statResp = sttp.auth
.basic(Params.sonatypeUser, Params.sonatypePassword)
.header("Accept", "application/json")
.get(u)
.send()

if (!statResp.isSuccess)
sys.error("Error getting project stats: " + statResp.statusText)

val stats = statResp.body.right.get.trim

val empty0 = empty(stats)
if (empty0)
System.err.println(s"Empty response at $monthYear")
else
write(monthYear, stats)

monthYear -> !empty0
}
}

object SonatypeStats {

def collect(): Unit = {
val projId: String = {
val projectIds: Map[String, String] = {
val projResp = sttp.auth
.basic(Params.sonatypeUser, Params.sonatypePassword)
.header("Accept", "application/json")
.get(uri"https://oss.sonatype.org/service/local/stats/projects")
.send()

if (!projResp.isSuccess)
sys.error("Error getting project list: " + projResp.statusText)

val respJson = ujson.read(projResp.body.right.get)

read[Seq[Elem]](respJson("data"))
.map(e => e.name -> e.id)
.toMap
}

projectIds(Params.proj)
}

val artifactStatsPerVersion = Params.artifacts.flatMap { artifact =>
Seq(
Data(
Params.base.resolve("per-version-unique-ips").resolve(artifact),
"csv",
_.isEmpty,
"slices_csv",
"ip",
projId,
Params.organization,
artifact = Some(artifact)
),
Data(
Params.base.resolve("per-version-stats").resolve(artifact),
"csv",
_.isEmpty,
"slices_csv",
"raw",
projId,
Params.organization,
artifact = Some(artifact)
)
)
}

for (data <- artifactStatsPerVersion) {
val it = Iterator.iterate(Params.start)(_.minusMonths(1L))
val processed = data
.process(it)
.takeWhile { case (monthYear, nonEmpty) =>
nonEmpty || monthYear.compareTo(Params.cutOff) >= 0
}
.length

System.err.println(
s"Processed $processed months in ${data.base} for type ${data.tpe}"
)
}
}

}
16 changes: 16 additions & 0 deletions .github/scripts/update.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env -S scala shebang

// Adapted from https://github.com/alexarchambault/sonatype-stats
//
// /!\ Run it from the repository root directory!

//> using scala "2.12.17"
//> using lib "com.softwaremill.sttp::core:1.5.10"
//> using lib "com.lihaoyi::upickle:2.0.0"
//> using lib "com.github.tototoshi::scala-csv:1.3.5"
//> using lib "com.twitter::algebird-core:0.13.0"
//> using lib "org.plotly-scala::plotly-render:0.5.2"
//> using files "sonatype-stats.scala", "plot.scala"

stats.SonatypeStats.collect()
plot.Plot.writePlots()
29 changes: 29 additions & 0 deletions .github/workflows/sonatype-stats.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

on:
workflow_dispatch:
schedule:
- cron: '0 0 15 * *'

jobs:
update_data:
runs-on: ubuntu-20.04
steps:
- uses: coursier/cache-action@v6
- uses: VirtusLab/scala-cli-setup@v0.2.0
- uses: actions/checkout@v3
- name: Update stats
env:
SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }}
SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }}
run: .github/scripts/update.sc
- name: Push changes
run: |
git config --global user.name 'Scala Center Bot'
git config --global user.email 'scala-center-bot@users.noreply.github.com'
git add .
git commit --allow-empty -m "Update stats"
git push
- uses: gautamkrishnar/keepalive-workflow@v1
with:
committer_username: scala-center-bot
committer_email: scala-center-bot@users.noreply.github.com
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ out/
.metals/
.vscode/
metals.sbt

# Scala CLI specific
.scala-build/
Loading