Skip to content

Commit

Permalink
Fix #24 and improve statistics reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
dacr committed Jul 28, 2024
1 parent 5bb7232 commit 6b9c019
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ object PhotoOperations {
PhotoId(ulid)
}

def buildPhotoCategory(baseDirectory: BaseDirectoryPath, photoPath: PhotoPath): Option[PhotoCategory] = {
val category = Option(photoPath.getParent).map { photoParentDir =>
def buildPhotoEvent(baseDirectory: BaseDirectoryPath, photoPath: PhotoPath): Option[PhotoEvent] = {
val event = Option(photoPath.getParent).map { photoParentDir =>
val text = baseDirectory.relativize(photoParentDir).toString
PhotoCategory(text)
PhotoEvent(text)
}
category.filter(_.text.size > 0)
event.filter(_.text.size > 0)
}

def getOriginalFileLastModified(original: Original): IO[PhotoFileIssue, OffsetDateTime] = {
Expand Down Expand Up @@ -301,8 +301,8 @@ object PhotoOperations {
photoId = buildPhotoId(photoTimestamp)
photoSource <- buildPhotoSource(photoId, original)
foundPlace = extractPlace(drewMetadata)
category = buildPhotoCategory(original.baseDirectory, original.path)
photoDescription = PhotoDescription(category = category)
event = buildPhotoEvent(original.baseDirectory, original.path)
photoDescription = PhotoDescription(event = event)
_ <- PhotoStoreService.photoSourceUpsert(originalId, photoSource)
_ <- PhotoStoreService.photoMetaDataUpsert(photoId, photoMetaData) // TODO LMDB add a transaction feature to avoid leaving partial data...
_ <- PhotoStoreService.photoDescriptionUpsert(photoId, photoDescription)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ case class PhotoSimpleQuery(
timeRange: Option[TimeRange]
)

case class CategorySimpleQuery(
case class EventSimpleQuery(
ownerId: Option[PhotoOwnerId],
keywords: Option[List[String]]
)
Expand All @@ -30,5 +30,5 @@ case class CategorySimpleQuery(
trait PhotoService {
def photoGet(id: PhotoId): IO[PhotoServiceIssue, Photo]
def photoFind(query: PhotoSimpleQuery): Stream[PhotoServiceIssue, Photo]
def categoryFind(query: CategorySimpleQuery): Stream[PhotoServiceIssue, PhotoCategory]
def eventFind(query: EventSimpleQuery): Stream[PhotoServiceIssue, PhotoEvent]
}
Original file line number Diff line number Diff line change
Expand Up @@ -522,15 +522,15 @@ class PhotoStoreServiceLive private (
def daoDescriptionToDescription(from: DaoPhotoDescription): PhotoDescription = {
PhotoDescription(
text = from.text,
category = from.category.map(PhotoCategory.apply),
event = from.event.map(PhotoEvent.apply),
keywords = from.keywords.map(keywords => keywords.map(PhotoKeyword.apply))
)
}

def descriptionsToDaoDescription(from: PhotoDescription): DaoPhotoDescription = {
DaoPhotoDescription(
text = from.text,
category = from.category.map(_.text),
event = from.event.map(_.text),
keywords = from.keywords.map(keywords => keywords.map(_.text))
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ import java.time.OffsetDateTime

case class DaoPhotoDescription(
text: Option[String],
category: Option[String] = None,
event: Option[String] = None,
keywords: Option[Set[String]] = None
) derives JsonCodec
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ object OriginalsStreamSpec extends ZIOSpecDefault with TestDatasets {
photos.size == 5,
photoFileSources.head.original.baseDirectory == dataset1,
photoFilePaths.toSet == Set(dataset1Example1, dataset1Example2, dataset1Example3, dataset1Example4, dataset1Example5),
photos.forall(_.description.flatMap(_.category).isEmpty)
photos.forall(_.description.flatMap(_.event).isEmpty)
)
},
test("collect original photos with tree dataset") {
Expand All @@ -37,12 +37,12 @@ object OriginalsStreamSpec extends ZIOSpecDefault with TestDatasets {
photos <- originalsStream.runCollect
photoFileSources = photos.map(_.source)
photoFilePaths = photoFileSources.map(_.original.path)
photoCategories = photos.flatMap(_.description.flatMap(_.category).map(_.text))
photoEvents = photos.flatMap(_.description.flatMap(_.event).map(_.text))
} yield assertTrue(
photos.size == 2,
photoFileSources.head.original.baseDirectory == dataset2,
photoFilePaths.toSet == Set(dataset2tag1, dataset2landscape1),
photoCategories.toSet == Set("landscapes", "tags")
photoEvents.toSet == Set("landscapes", "tags")
)
}
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ object PhotoOperationsSpec extends ZIOSpecDefault with TestDatasets {

val testLogic =
suite("Photo operations")(
test("photo category exists") {
val check = (basedir: String, path: String, expected: Option[String]) => buildPhotoCategory(Path.of(basedir), Path.of(path)) == expected.map(PhotoCategory.apply)
test("photo event exists") {
val check = (basedir: String, path: String, expected: Option[String]) => buildPhotoEvent(Path.of(basedir), Path.of(path)) == expected.map(PhotoEvent.apply)
assertTrue(
check("tmp", "tmp/toto.jpeg", None),
check("tmp/", "tmp/toto.jpeg", None),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package fr.janalyse.sotohp.model

import java.time.OffsetDateTime

case class PhotoCategory(
case class PhotoEvent(
text: String
) extends AnyVal

Expand All @@ -12,6 +12,6 @@ case class PhotoKeyword(

case class PhotoDescription(
text: Option[String] = None, // user description text if some has been given
category: Option[PhotoCategory] = None, // default value is based on user directory tree where photo are stored
keywords: Option[Set[PhotoKeyword]] = None // default value is based on keywords extracted from category using various extraction rules
event: Option[PhotoEvent] = None, // default value is based on user directory tree where photo are stored
keywords: Option[Set[PhotoKeyword]] = None // default value is based on keywords extracted from event using various extraction rules
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ case class SaoPhoto(
fileSize: Long,
fileHash: String,
fileLastUpdated: OffsetDateTime,
category: Option[String],
event: Option[String],
shootDateTime: Option[OffsetDateTime],
camera: Option[String],
// tags: Map[String, String],
Expand All @@ -34,7 +34,7 @@ case class SaoPhoto(

object SaoPhoto {
// ------------------------------------------
// TODO temporary keyword extraction from category
// TODO temporary keyword extraction from event
def camelTokenize(that: String): Array[String] = that.split("(?=[A-Z][^A-Z])|(?:(?<=[^A-Z])(?=[A-Z]+))")

def camelToKebabCase(that: String): String = camelTokenize(that).map(_.toLowerCase).mkString("-")
Expand Down Expand Up @@ -201,9 +201,9 @@ object SaoPhoto {

def extractKeywords(input: Option[String]): List[String] =
input match {
case None => Nil
case Some(category) =>
applyFixes(fixes, category)
case None => Nil
case Some(event) =>
applyFixes(fixes, event)
.split("[- /,]+")
.toList
.filter(_.size > 0)
Expand All @@ -220,20 +220,20 @@ object SaoPhoto {
// ------------------------------------------

def fromPhoto(photo: Photo): SaoPhoto = {
val category = photo.description.flatMap(_.category).map(_.text)
val event = photo.description.flatMap(_.event).map(_.text)
SaoPhoto(
id = photo.source.photoId.id.toString,
timestamp = photo.timestamp,
filePath = photo.source.original.path.toString,
fileSize = photo.source.fileSize,
fileHash = photo.source.fileHash.code,
fileLastUpdated = photo.source.fileLastModified,
category = category,
event = event,
shootDateTime = photo.metaData.flatMap(_.shootDateTime),
camera = photo.metaData.flatMap(_.cameraName),
// tags = photo.metaData.map(_.tags).getOrElse(Map.empty),
// keywords = photo.description.map(_.keywords.toList).getOrElse(Nil),
keywords = extractKeywords(category), // TODO temporary keyword extraction from category
keywords = extractKeywords(event), // TODO temporary keyword extraction from event
classifications = photo.foundClassifications.map(_.classifications.map(_.name).distinct).getOrElse(Nil),
detectedObjects = photo.foundObjects.map(_.objects.map(_.name).distinct).getOrElse(Nil),
detectedObjectsCount = photo.foundObjects.map(_.objects.size).getOrElse(0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ case class QuickFixState(

case class QuickFixPhoto(
state: PhotoState,
event: Option[PhotoEvent] = None,
place: Option[PhotoPlace] = None
)

Expand All @@ -38,10 +39,11 @@ object PlacesFix extends ZIOAppDefault with CommonsCLI {
for {
foundPlace <- input.place
foundState = input.state
} yield QuickFixPhoto(state = foundState, place = foundPlace)
foundEvent <- input.description.map(_.flatMap(_.event))
} yield QuickFixPhoto(state = foundState, event = foundEvent, place = foundPlace)
}

def quickFix(state: QuickFixState, photos: Chunk[QuickFixPhoto]): ZIO[PhotoStoreService, IOException, QuickFixState] = {
def quickFixByTimeWindow(state: QuickFixState, photos: Chunk[QuickFixPhoto]): ZIO[PhotoStoreService, IOException, QuickFixState] = {
val fixable = for {
index <- photos.indices
current = photos(index)
Expand All @@ -63,11 +65,24 @@ object PlacesFix extends ZIOAppDefault with CommonsCLI {
}

val logic = ZIO.logSpan("PlacesFix") {
val photoStream = PhotoStream.photoLazyStream()
photoStream
.mapZIO(convert)
.sliding(300, 50)
.runFoldZIO(QuickFixState())((state, photos) => quickFix(state, photos))
.flatMap(state => ZIO.logInfo(s"${state.fixed.size} places fixed"))
val photoStream = PhotoStream.photoLazyStream()
val initialState = QuickFixState()
for {
step1State <- photoStream
.mapZIO(convert)
.sliding(300, 50)
.runFoldZIO(initialState)((state, photos) => quickFixByTimeWindow(state, photos))
events <- photoStream
.mapZIO(convert)
.filter(_.event.isDefined)
.map(_.event.get)
.runFold(Set.empty[PhotoEvent])((set, event) => set + event)
// step2State <- photoStream
// .mapZIO(convert)
// .filter(_.event.isDefined)
// .runFoldZIO(step1State)((state, photos) => ???)
finalState = step1State
_ <- ZIO.logInfo(s"${finalState.fixed.size} places fixed using timeWindow")
} yield ()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,28 @@ package fr.janalyse.sotohp.cli
import fr.janalyse.sotohp.core.*
import fr.janalyse.sotohp.model.*
import fr.janalyse.sotohp.processor.NormalizeProcessor
import fr.janalyse.sotohp.store.{PhotoStoreService, LazyPhoto}
import fr.janalyse.sotohp.store.{LazyPhoto, PhotoStoreService}
import zio.*
import zio.config.typesafe.*
import zio.lmdb.LMDB

import java.time.{OffsetDateTime, Instant}
import java.time.temporal.ChronoUnit.{MONTHS, YEARS}
import java.time.{Instant, OffsetDateTime}
import scala.io.AnsiColor.*

case class Statistics(
count: Int = 0,
geolocalizedCount: Int = 0,
geoLocalizedCount: Int = 0,
normalizedFailureCount: Int = 0,
facesCount: Int = 0,
duplicated: Map[String, Int] = Map.empty,
missingCount: Int = 0,
modifiedCount: Int = 0,
missingShootingDate: Int = 0,
invalidShootingDateCount: Int = 0
invalidShootingDateCount: Int = 0,
eventsCount: Map[Option[PhotoEvent], Int] = Map.empty,
oldestDigitalShootingDate: Option[OffsetDateTime] = None,
newestDigitalShootingDate: Option[OffsetDateTime] = None
)

object Statistics extends ZIOAppDefault with CommonsCLI {
Expand All @@ -36,7 +40,8 @@ object Statistics extends ZIOAppDefault with CommonsCLI {
Scope.default
)

val shootingDateMinimumValidYear = 1826 // https://en.wikipedia.org/wiki/History_of_photography
val shootingDateMinimumValidYear = 1826 // https://en.wikipedia.org/wiki/History_of_photography
val digitalShootingDateMinimumValidYear = 1989 // https://en.wikipedia.org/wiki/Digital_camera

def updateStats(stats: Statistics, zphoto: LazyPhoto) = {
for {
Expand All @@ -48,13 +53,14 @@ object Statistics extends ZIOAppDefault with CommonsCLI {
shootingDate = meta.flatMap(_.shootDateTime)
filehash = source.fileHash.code
originalFound <- ZIO.attempt(source.original.path.toFile.exists())
description <- zphoto.description.some
originalModified <- ZIO
.attempt(source.fileLastModified.toInstant.toEpochMilli != source.original.path.toFile.lastModified())
.when(originalFound)
.someOrElse(false)
} yield {
val updatedCount = stats.count + 1
val updatedGeolocalizedCount = stats.geolocalizedCount + (if (place.isDefined) 1 else 0)
val updatedGeolocalizedCount = stats.geoLocalizedCount + (if (place.isDefined) 1 else 0)
val updatedNormalizedFailureCount = stats.normalizedFailureCount + (if (hasNormalized) 0 else 1)
val updatedFacesCount = stats.facesCount + faces.map(_.count).getOrElse(0)
val updatedMissingCount = stats.missingCount + (if (originalFound) 0 else 1)
Expand All @@ -65,31 +71,58 @@ object Statistics extends ZIOAppDefault with CommonsCLI {
})
val updatedMissingShootingDateCount = stats.missingShootingDate + (if (shootingDate.isEmpty) 1 else 0)
val updatedInvalidShootingDateCount = stats.invalidShootingDateCount + (if (shootingDate.exists(_.getYear < shootingDateMinimumValidYear)) 1 else 0)
val updatedEventsCount = stats.eventsCount + (stats.eventsCount.get(description.event) match {
case None => description.event -> 1
case Some(count) => description.event -> (count + 1)
})
val updatedOldestValidTimestamp = (stats.oldestDigitalShootingDate, shootingDate) match {
case (_, Some(date)) if date.getYear < digitalShootingDateMinimumValidYear => stats.oldestDigitalShootingDate
case (None, Some(date)) => Some(date)
case (Some(currentOldest), Some(date)) if date.isBefore(currentOldest) => Some(date)
case _ => stats.oldestDigitalShootingDate
}
val updatedNewestValidTimestamp = (stats.newestDigitalShootingDate, shootingDate) match {
case (None, Some(date)) => Some(date)
case (Some(currentNewest), Some(date)) if date.isAfter(currentNewest) => Some(date)
case _ => stats.newestDigitalShootingDate
}
stats.copy(
count = updatedCount,
geolocalizedCount = updatedGeolocalizedCount,
geoLocalizedCount = updatedGeolocalizedCount,
normalizedFailureCount = updatedNormalizedFailureCount,
duplicated = updatedDuplicated,
facesCount = updatedFacesCount,
missingCount = updatedMissingCount,
modifiedCount = updatedModifiedCount,
missingShootingDate = updatedMissingShootingDateCount,
invalidShootingDateCount = updatedInvalidShootingDateCount
invalidShootingDateCount = updatedInvalidShootingDateCount,
eventsCount = updatedEventsCount,
oldestDigitalShootingDate = updatedOldestValidTimestamp,
newestDigitalShootingDate = updatedNewestValidTimestamp
)
}
}

def reportStats(stats: Statistics) = {
import stats.*
val duplicatedCount = stats.duplicated.filter((_, count) => count > 1).size
val duplicatedCount = stats.duplicated.count((_, count) => count > 1)
val eventCount = eventsCount.count((k, v) => k.isDefined)
val (digitalShootingMonths, digitalShootingYears) = (oldestDigitalShootingDate, newestDigitalShootingDate) match {
case (Some(oldest), Some(newest)) => (MONTHS.between(oldest, newest), YEARS.between(oldest, newest))
case _ => (0, 0)
}
for {
_ <- Console.printLine(s"${UNDERLINED}${BLUE}Photo statistics :$RESET")
_ <- Console.printLine(s"${GREEN}- $count photos$RESET")
_ <- Console.printLine(s"${GREEN}- $digitalShootingMonths months of digital photography ($digitalShootingYears years)$RESET")
_ <- Console.printLine(s"${GREEN} - ${oldestDigitalShootingDate.get} -> ${newestDigitalShootingDate.get}$RESET").when(oldestDigitalShootingDate.isDefined && newestDigitalShootingDate.isDefined)
_ <- Console.printLine(s"${GREEN}- $facesCount people faces$RESET")
_ <- Console.printLine(s"${GREEN}- $geolocalizedCount geolocalized photos $YELLOW(${count-geolocalizedCount} without GPS infos)$RESET")
_ <- Console.printLine(s"${GREEN}- $geoLocalizedCount geolocalized photos $YELLOW(${count - geoLocalizedCount} without GPS infos)$RESET")
_ <- Console.printLine(s"${GREEN}- $eventCount events")
_ <- Console.printLine(s"${YELLOW}- $duplicatedCount duplicated photos$RESET").when(duplicatedCount > 0)
_ <- Console.printLine(s"${YELLOW}- $missingShootingDate photos without shooting date$RESET").when(missingShootingDate > 0)
_ <- Console.printLine(s"${YELLOW}- $modifiedCount modified originals$RESET").when(modifiedCount > 0)
_ <- Console.printLine(s"${YELLOW}- ${eventsCount.getOrElse(None, 0)} orphan photos (no related event)$RESET")
_ <- Console.printLine(s"${RED}- $missingCount missing originals !!$RESET").when(missingCount > 0)
_ <- Console.printLine(s"${RED}- $invalidShootingDateCount invalid shooting date year (< $shootingDateMinimumValidYear)$RESET").when(invalidShootingDateCount > 0)
_ <- Console.printLine(s"${RED}- $normalizedFailureCount not loadable photos (probably not supported format or corrupted)$RESET").when(normalizedFailureCount > 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ object SynchronizeAndProcess extends ZIOAppDefault with CommonsCLI {
.mapZIO(classificationProcessor.analyze)
.mapZIO(objectsDetectionProcessor.analyze)
.mapZIO(facesProcessor.analyze)
.filter(_.lastSynchronized.isEmpty)
.filter(_.lastSynchronized.isEmpty) // TODO it only synchronizes new photos, changes are not synchronized
.grouped(500)
.mapZIO(SearchService.publish)
.mapZIO(updatePhotoStates)
Expand Down
Loading

0 comments on commit 6b9c019

Please sign in to comment.