Skip to content

Commit

Permalink
Address Ben's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
spenes committed Apr 30, 2024
1 parent 2dd6bd0 commit 5cebac0
Show file tree
Hide file tree
Showing 16 changed files with 116 additions and 111 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer}

import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

final case class AtomicFields(value: List[LimitedAtomicField])

Expand Down Expand Up @@ -134,7 +134,7 @@ object AtomicFields {
AtomicFields(withLimits)
}

def errorsToSchemaViolation(errors: NonEmptyList[AtomicFieldValidationError]): FailureDetails.SchemaViolation = {
def errorsToSchemaViolation(errors: NonEmptyList[AtomicError]): FailureDetails.SchemaViolation = {
val clientError = ValidationError(ValidatorError.InvalidData(errors.map(_.toValidatorReport)), None)

FailureDetails.SchemaViolation.IgluError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import com.snowplowanalytics.snowplow.badrows.FailureDetails

import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

/**
* Atomic fields length validation inspired by
Expand Down Expand Up @@ -52,22 +52,23 @@ object AtomicFieldsLengthValidator {
private def validateField(
event: EnrichedEvent,
atomicField: LimitedAtomicField
): Either[AtomicFieldValidationError, Unit] = {
): Either[AtomicError.FieldLengthError, Unit] = {
val actualValue = atomicField.value.enrichedValueExtractor(event)
if (actualValue != null && actualValue.length > atomicField.limit)
AtomicFieldValidationError(
s"Field is longer than maximum allowed size ${atomicField.limit}",
atomicField.value.name,
AtomicFieldValidationError.AtomicFieldLengthExceeded
).asLeft
AtomicError
.FieldLengthError(
s"Field is longer than maximum allowed size ${atomicField.limit}",
atomicField.value.name
)
.asLeft
else
Right(())
}

private def handleAcceptableErrors[F[_]: Monad](
invalidCount: F[Unit],
event: EnrichedEvent,
errors: NonEmptyList[AtomicFieldValidationError]
errors: NonEmptyList[AtomicError.FieldLengthError]
): F[Unit] =
invalidCount *>
Monad[F].pure(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import java.lang.{Integer => JInteger}

import cats.syntax.either._

import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

/**
* Contains enrichments related to the client - where the client is the software which is using the
Expand All @@ -36,7 +36,7 @@ object ClientEnrichments {
* @param res The packed string holding the screen dimensions
* @return the ResolutionTuple or an error message, boxed in a Scalaz Validation
*/
val extractViewDimensions: (String, String) => Either[AtomicFieldValidationError, (JInteger, JInteger)] =
val extractViewDimensions: (String, String) => Either[AtomicError.ParseError, (JInteger, JInteger)] =
(field, res) =>
(res match {
case ResRegex(width, height) =>
Expand All @@ -45,7 +45,7 @@ object ClientEnrichments {
.leftMap(_ => "Could not be converted to java.lang.Integer s")
case _ => s"Does not conform to regex ${ResRegex.toString}".asLeft
}).leftMap { msg =>
AtomicFieldValidationError(msg, field, AtomicFieldValidationError.ParseError)
AtomicError.ParseError(msg, field)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import com.snowplowanalytics.refererparser._

import com.snowplowanalytics.iglu.client.IgluCirceClient
import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.iglu.core.SelfDescribingData
import com.snowplowanalytics.iglu.core.circe.implicits._
Expand Down Expand Up @@ -338,7 +338,7 @@ object EnrichmentManager {
.toEither
}

def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[AtomicFieldValidationError, Unit] =
def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[AtomicError.ParseError, Unit] =
EE.formatCollectorTstamp(timestamp).map { t =>
event.collector_tstamp = t
().asRight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import cats.syntax.option._
import org.joda.time.{DateTime, DateTimeZone, Period}
import org.joda.time.format.DateTimeFormat

import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.snowplow.badrows._

Expand Down Expand Up @@ -49,14 +49,14 @@ object EventEnrichments {
* @param Optional collectorTstamp
* @return Validation boxing the result of making the timestamp Redshift-compatible
*/
def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[AtomicFieldValidationError, String] =
def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[AtomicError.ParseError, String] =
collectorTstamp match {
case None => AtomicFieldValidationError("Field not set", "collector_tstamp", AtomicFieldValidationError.ParseError).asLeft
case None => AtomicError.ParseError("Field not set", "collector_tstamp").asLeft
case Some(t) =>
val formattedTimestamp = toTimestamp(t)
if (formattedTimestamp.startsWith("-") || t.getYear > 9999 || t.getYear < 0) {
val msg = s"Formatted as $formattedTimestamp is not Redshift-compatible"
AtomicFieldValidationError(msg, "collector_tstamp", AtomicFieldValidationError.ParseError).asLeft
AtomicError.ParseError(msg, "collector_tstamp").asLeft
} else
formattedTimestamp.asRight
}
Expand Down Expand Up @@ -113,26 +113,28 @@ object EventEnrichments {
* @param tstamp The timestamp as stored in the Tracker Protocol
* @return a Tuple of two Strings (date and time), or an error message if the format was invalid
*/
val extractTimestamp: (String, String) => Either[AtomicFieldValidationError, String] =
val extractTimestamp: (String, String) => Either[AtomicError.ParseError, String] =
(field, tstamp) =>
try {
val dt = new DateTime(tstamp.toLong)
val timestampString = toTimestamp(dt)
if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0)
AtomicFieldValidationError(
s"Formatting as $timestampString is not Redshift-compatible",
field,
AtomicFieldValidationError.ParseError
).asLeft
AtomicError
.ParseError(
s"Formatting as $timestampString is not Redshift-compatible",
field
)
.asLeft
else
timestampString.asRight
} catch {
case _: NumberFormatException =>
AtomicFieldValidationError(
"Not in the expected format: ms since epoch",
field,
AtomicFieldValidationError.ParseError
).asLeft
AtomicError
.ParseError(
"Not in the expected format: ms since epoch",
field
)
.asLeft
}

/**
Expand All @@ -142,7 +144,7 @@ object EventEnrichments {
* @param eventCode The event code
* @return the event type, or an error message if not recognised, boxed in a Scalaz Validation
*/
val extractEventType: (String, String) => Either[AtomicFieldValidationError, String] =
val extractEventType: (String, String) => Either[AtomicError.ParseError, String] =
(field, code) =>
code match {
case "se" => "struct".asRight
Expand All @@ -155,7 +157,7 @@ object EventEnrichments {
case "pp" => "page_ping".asRight
case _ =>
val msg = "Not a valid event type"
AtomicFieldValidationError(msg, field, AtomicFieldValidationError.ParseError).asLeft
AtomicError.ParseError(msg, field).asLeft
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import com.snowplowanalytics.snowplow.badrows.Processor
import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData}
import com.snowplowanalytics.iglu.core.circe.implicits._

import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU, AtomicFieldValidationError}
import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU, AtomicError}

/** Miscellaneous enrichments which don't fit into one of the other modules. */
object MiscEnrichments {
Expand All @@ -44,7 +44,7 @@ object MiscEnrichments {
* @param platform The code for the platform generating this event.
* @return a Scalaz ValidatedString.
*/
val extractPlatform: (String, String) => Either[AtomicFieldValidationError, String] =
val extractPlatform: (String, String) => Either[AtomicError.ParseError, String] =
(field, platform) =>
platform match {
case "web" => "web".asRight // Web, including Mobile Web
Expand All @@ -58,11 +58,11 @@ object MiscEnrichments {
case "headset" => "headset".asRight // AR/VR Headset
case _ =>
val msg = "Not a valid platform"
AtomicFieldValidationError(msg, field, AtomicFieldValidationError.ParseError).asLeft
AtomicError.ParseError(msg, field).asLeft
}

/** Make a String TSV safe */
val toTsvSafe: (String, String) => Either[AtomicFieldValidationError, String] =
val toTsvSafe: (String, String) => Either[AtomicError.ParseError, String] =
(_, value) => CU.makeTsvSafe(value).asRight

/**
Expand All @@ -71,7 +71,7 @@ object MiscEnrichments {
* Here we retrieve the first one as it is supposed to be the client one, c.f.
* https://en.m.wikipedia.org/wiki/X-Forwarded-For#Format
*/
val extractIp: (String, String) => Either[AtomicFieldValidationError, String] =
val extractIp: (String, String) => Either[AtomicError.ParseError, String] =
(_, value) => {
val lastIp = Option(value).map(_.split("[,|, ]").head).orNull
CU.makeTsvSafe(lastIp).asRight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments
import cats.implicits._
import cats.data.ValidatedNel

import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicFieldValidationError
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.snowplow.enrich.common.enrichments.{EventEnrichments => EE}
import com.snowplowanalytics.snowplow.enrich.common.enrichments.{MiscEnrichments => ME}
Expand All @@ -31,7 +31,7 @@ object Transform {
* to "user_ipaddress" in the enriched event
* @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\
*/
private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[AtomicFieldValidationError, Unit] = {
private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[AtomicError.ParseError, Unit] = {
val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) }
val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap)
val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,27 @@ package com.snowplowanalytics.snowplow.enrich.common.utils

import com.snowplowanalytics.iglu.client.validator.ValidatorReport

case class AtomicFieldValidationError(
message: String,
field: String,
errorType: AtomicFieldValidationError.ErrorType
) {
sealed trait AtomicError {
def message: String
def field: String
def repr: String
def toValidatorReport: ValidatorReport =
ValidatorReport(message, Some(field), Nil, Some(errorType.repr))
ValidatorReport(message, Some(field), Nil, Some(repr))
}

object AtomicFieldValidationError {
sealed trait ErrorType {
def repr: String
}
case object ParseError extends ErrorType {
override def repr: String = "ParseError"
object AtomicError {

case class ParseError(
message: String,
field: String
) extends AtomicError {
override def repr: String = "atomic_field_parse_error"
}
case object AtomicFieldLengthExceeded extends ErrorType {
override def repr: String = "AtomicFieldLengthExceeded"

case class FieldLengthError(
message: String,
field: String
) extends AtomicError {
override def repr: String = "atomic_field_length_exceeded"
}
}
Loading

0 comments on commit 5cebac0

Please sign in to comment.