From 55968932a80914bc29318965f5dbdea3be0eb64f Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Thu, 31 Jan 2019 16:06:45 +0200 Subject: [PATCH] Add type-safe Event API (close #53) --- build.sbt | 7 +- project/Dependencies.scala | 10 +- .../Common.scala | 30 + .../Event.scala | 258 +++ .../SnowplowEvent.scala | 95 + .../decode/Parser.scala | 72 + .../decode/RowDecoder.scala | 58 + .../decode/ValueDecoder.scala | 188 ++ .../decode/package.scala | 26 + .../json/Data.scala | 19 +- .../json/EventTransformer.scala | 20 +- .../json/JsonShredder.scala | 2 +- .../EventSpec.scala | 1567 +++++++++++++++++ .../RunManifestsSpec.scala | 0 .../decode/ValueDecoderSpec.scala | 205 +++ .../json}/EventTransformerSpec.scala | 18 +- .../json}/JsonShredderSpec.scala | 0 .../json}/SyntaxSpec.scala | 0 18 files changed, 2545 insertions(+), 30 deletions(-) create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Common.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoder.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/package.scala create mode 100644 src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala rename src/test/scala/{ => com.snowplowanalytics.snowplow.analytics.scalasdk}/RunManifestsSpec.scala (100%) create mode 100644 src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoderSpec.scala rename src/test/scala/{com.snowplowanalytics.snowplow.analytics.scalasdk.json => com.snowplowanalytics.snowplow.analytics.scalasdk/json}/EventTransformerSpec.scala (98%) rename src/test/scala/{com.snowplowanalytics.snowplow.analytics.scalasdk.json => com.snowplowanalytics.snowplow.analytics.scalasdk/json}/JsonShredderSpec.scala (100%) rename src/test/scala/{com.snowplowanalytics.snowplow.analytics.scalasdk.json => com.snowplowanalytics.snowplow.analytics.scalasdk/json}/SyntaxSpec.scala (100%) diff --git a/build.sbt b/build.sbt index c6a8c15..418a2bd 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -28,6 +28,11 @@ lazy val root = project.in(file(".")) .settings( libraryDependencies ++= Seq( // Scala + Dependencies.igluCore, + Dependencies.cats, + Dependencies.circeParser, + Dependencies.circeGeneric, + Dependencies.circeJava, Dependencies.json4sJackson, Dependencies.s3, Dependencies.dynamodb, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 0520fe2..2874ef6 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -15,6 +15,9 @@ import sbt._ object Dependencies { object V { + val igluCore = "0.4.0" + val cats = "1.6.0" + val circe = "0.11.1" val aws = "1.11.490" val json4s = "3.2.11" // Scala (test only) @@ -22,6 +25,11 @@ object Dependencies { val scalaCheck = "1.14.0" } + val igluCore = "com.snowplowanalytics" %% "iglu-core-circe" % V.igluCore + val cats = "org.typelevel" %% "cats-core" % V.cats + val circeParser = "io.circe" %% "circe-parser" % V.circe + val circeGeneric = "io.circe" %% "circe-generic" % V.circe + val circeJava = "io.circe" %% "circe-java8" % V.circe val json4sJackson = "org.json4s" %% "json4s-jackson" % V.json4s val s3 = "com.amazonaws" % "aws-java-sdk-s3" % V.aws val dynamodb = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.aws diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Common.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Common.scala new file mode 100644 index 0000000..7afcf84 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Common.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk + +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} + +object Common { + + val UnstructEventCriterion = + SchemaCriterion("com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", 1, 0) + + val ContextsCriterion = + SchemaCriterion("com.snowplowanalytics.snowplow", "contexts", "jsonschema", 1, 0) + + val UnstructEventUri = + SchemaKey("com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", SchemaVer.Full(1, 0, 0)) + + val ContextsUri = + SchemaKey("com.snowplowanalytics.snowplow", "contexts", "jsonschema", SchemaVer.Full(1, 0, 0)) +} diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala new file mode 100644 index 0000000..f285a95 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk + +// java +import java.time.Instant +import java.util.UUID + +// circe +import io.circe.{Encoder, Json, JsonObject, ObjectEncoder} +import io.circe.Json.JString +import io.circe.generic.semiauto._ +import io.circe.syntax._ +import io.circe.java8.time._ + +// iglu +import com.snowplowanalytics.iglu.core.SelfDescribingData +import com.snowplowanalytics.iglu.core.circe.instances._ + +// This library +import com.snowplowanalytics.snowplow.analytics.scalasdk.json.Data +import com.snowplowanalytics.snowplow.analytics.scalasdk.decode.{Parser, RowDecodeResult} +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ + +/** + * Case class representing a canonical Snowplow event. + * + * @see https://github.com/snowplow/snowplow/wiki/canonical-event-model + */ +case class Event(app_id: Option[String], + platform: Option[String], + etl_tstamp: Option[Instant], + collector_tstamp: Instant, + dvce_created_tstamp: Option[Instant], + event: Option[String], + event_id: UUID, + txn_id: Option[Int], + name_tracker: Option[String], + v_tracker: Option[String], + v_collector: String, + v_etl: String, + user_id: Option[String], + user_ipaddress: Option[String], + user_fingerprint: Option[String], + domain_userid: Option[String], + domain_sessionidx: Option[Int], + network_userid: Option[String], + geo_country: Option[String], + geo_region: Option[String], + geo_city: Option[String], + geo_zipcode: Option[String], + geo_latitude: Option[Double], + geo_longitude: Option[Double], + geo_region_name: Option[String], + ip_isp: Option[String], + ip_organization: Option[String], + ip_domain: Option[String], + ip_netspeed: Option[String], + page_url: Option[String], + page_title: Option[String], + page_referrer: Option[String], + page_urlscheme: Option[String], + page_urlhost: Option[String], + page_urlport: Option[Int], + page_urlpath: Option[String], + page_urlquery: Option[String], + page_urlfragment: Option[String], + refr_urlscheme: Option[String], + refr_urlhost: Option[String], + refr_urlport: Option[Int], + refr_urlpath: Option[String], + refr_urlquery: Option[String], + refr_urlfragment: Option[String], + refr_medium: Option[String], + refr_source: Option[String], + refr_term: Option[String], + mkt_medium: Option[String], + mkt_source: Option[String], + mkt_term: Option[String], + mkt_content: Option[String], + mkt_campaign: Option[String], + contexts: Contexts, + se_category: Option[String], + se_action: Option[String], + se_label: Option[String], + se_property: Option[String], + se_value: Option[Double], + unstruct_event: UnstructEvent, + tr_orderid: Option[String], + tr_affiliation: Option[String], + tr_total: Option[Double], + tr_tax: Option[Double], + tr_shipping: Option[Double], + tr_city: Option[String], + tr_state: Option[String], + tr_country: Option[String], + ti_orderid: Option[String], + ti_sku: Option[String], + ti_name: Option[String], + ti_category: Option[String], + ti_price: Option[Double], + ti_quantity: Option[Int], + pp_xoffset_min: Option[Int], + pp_xoffset_max: Option[Int], + pp_yoffset_min: Option[Int], + pp_yoffset_max: Option[Int], + useragent: Option[String], + br_name: Option[String], + br_family: Option[String], + br_version: Option[String], + br_type: Option[String], + br_renderengine: Option[String], + br_lang: Option[String], + br_features_pdf: Option[Boolean], + br_features_flash: Option[Boolean], + br_features_java: Option[Boolean], + br_features_director: Option[Boolean], + br_features_quicktime: Option[Boolean], + br_features_realplayer: Option[Boolean], + br_features_windowsmedia: Option[Boolean], + br_features_gears: Option[Boolean], + br_features_silverlight: Option[Boolean], + br_cookies: Option[Boolean], + br_colordepth: Option[String], + br_viewwidth: Option[Int], + br_viewheight: Option[Int], + os_name: Option[String], + os_family: Option[String], + os_manufacturer: Option[String], + os_timezone: Option[String], + dvce_type: Option[String], + dvce_ismobile: Option[Boolean], + dvce_screenwidth: Option[Int], + dvce_screenheight: Option[Int], + doc_charset: Option[String], + doc_width: Option[Int], + doc_height: Option[Int], + tr_currency: Option[String], + tr_total_base: Option[Double], + tr_tax_base: Option[Double], + tr_shipping_base: Option[Double], + ti_currency: Option[String], + ti_price_base: Option[Double], + base_currency: Option[String], + geo_timezone: Option[String], + mkt_clickid: Option[String], + mkt_network: Option[String], + etl_tags: Option[String], + dvce_sent_tstamp: Option[Instant], + refr_domain_userid: Option[String], + refr_device_tstamp: Option[Instant], + derived_contexts: Contexts, + domain_sessionid: Option[String], + derived_tstamp: Option[Instant], + event_vendor: Option[String], + event_name: Option[String], + event_format: Option[String], + event_version: Option[String], + event_fingerprint: Option[String], + true_tstamp: Option[Instant]) { + + /** + * Extracts metadata from the event containing information about the types and Iglu URIs of its shred properties + */ + def inventory: Set[Data.ShreddedType] = { + val unstructEvent = unstruct_event + .data + .toSet + .map((ue: SelfDescribingData[Json]) => Data.ShreddedType(Data.UnstructEvent, ue.schema)) + + val derivedContexts = derived_contexts + .data + .toSet + .map((ctx: SelfDescribingData[Json]) => Data.ShreddedType(Data.Contexts(Data.DerivedContexts), ctx.schema)) + + val customContexts = contexts + .data + .toSet + .map((ctx: SelfDescribingData[Json]) => Data.ShreddedType(Data.Contexts(Data.CustomContexts), ctx.schema)) + + customContexts ++ derivedContexts ++ unstructEvent + } + + /** + * Returns the event as a map of keys to Circe JSON values, while dropping inventory fields + */ + def atomic: Map[String, Json] = toJsonMap - "contexts" - "unstruct_event" - "derived_contexts" + + /** + * Returns the event as a list of key/Circe JSON value pairs. + * Unlike `toJsonMap` and `atomic`, these keys use the ordering of the canonical event model + */ + def ordered: List[(String, Option[Json])] = + Event.parser.knownKeys.map(key => (key.name, toJsonMap.get(key.name))) + + /** + * Returns a compound JSON field containing information about an event's latitude and longitude, + * or None if one of these fields doesn't exist + */ + def geoLocation: Option[(String, Json)] = + for { + lat <- geo_latitude + lon <- geo_longitude + } yield "geo_location" -> s"$lat,$lon".asJson + + /** + * Transforms the event to a validated JSON whose keys are the field names corresponding to the + * EnrichedEvent POJO of the Scala Common Enrich project. If the lossy argument is true, any + * self-describing events in the fields (unstruct_event, contexts and derived_contexts) are returned + * in a "shredded" format (e.g. "unstruct_event_com_acme_1_myField": "value"), otherwise a standard + * self-describing format is used. + * + * @param lossy Whether unstruct_event, contexts and derived_contexts should be flattened + */ + def toJson(lossy: Boolean): Json = + if (lossy) { + JsonObject.fromMap(atomic ++ contexts.toShreddedJson.toMap ++ derived_contexts.toShreddedJson.toMap ++ unstruct_event.toShreddedJson.toMap ++ geoLocation).asJson + } else { + this.asJson + } + + /** + * Returns the event as a map of keys to Circe JSON values + */ + private def toJsonMap: Map[String, Json] = this.asJsonObject.toMap +} + +object Event { + /** + * Automatically derived Circe encoder + */ + implicit val jsonEncoder: ObjectEncoder[Event] = deriveEncoder[Event] + + /** + * Derived TSV parser for the Event class + */ + private val parser: Parser[Event] = Parser.deriveFor[Event].get + + /** + * Converts a string with an enriched event TSV to an Event instance, + * or a ValidatedNel containing information about errors + * + * @param line Enriched event TSV line + */ + def parse(line: String): RowDecodeResult[Event] = + parser.parse(line) +} diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala new file mode 100644 index 0000000..41fc4c7 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk + +// circe +import io.circe.syntax._ +import io.circe.{Encoder, Json, JsonObject} + +// iglu +import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ +import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} + +// This library +import com.snowplowanalytics.snowplow.analytics.scalasdk.json.Data +import com.snowplowanalytics.snowplow.analytics.scalasdk.json.Data.ShredProperty + +object SnowplowEvent { + + /** + * A JSON representation of an atomic event's unstruct_event field. + * + * @param data the unstruct event as self-describing JSON, or None if the field is missing + */ + case class UnstructEvent(data: Option[SelfDescribingData[Json]]) extends AnyVal { + def toShreddedJson: Option[(String, Json)] = { + data.map { + case SelfDescribingData(s, d) => + (transformSchema(Data.UnstructEvent, s.vendor, s.name, s.version.model), d) + } + } + } + + /** + * A JSON representation of an atomic event's contexts or derived_contexts fields. + * + * @param data the context as self-describing JSON, or None if the field is missing + */ + case class Contexts(data: List[SelfDescribingData[Json]]) extends AnyVal { + def toShreddedJson: Map[String, Json] = { + data.groupBy(x => (x.schema.vendor, x.schema.name, x.schema.format, x.schema.version.model)).map { + case ((vendor, name, _, model), d) => + (transformSchema(Data.Contexts(Data.CustomContexts), vendor, name, model), d.map { + selfdesc => selfdesc.data + }.asJson) + } + } + } + + implicit final val contextsCirceEncoder: Encoder[Contexts] = + Encoder.instance { contexts: Contexts => + if (contexts.data.isEmpty) JsonObject.empty.asJson + else JsonObject( + ("schema", Common.ContextsUri.toSchemaUri.asJson), + ("data", contexts.data.asJson) + ).asJson + } + + implicit final val unstructCirceEncoder: Encoder[UnstructEvent] = + Encoder.instance { unstructEvent: UnstructEvent => + if (unstructEvent.data.isEmpty) Json.Null + else JsonObject( + ("schema", Common.UnstructEventUri.toSchemaUri.asJson), + ("data", unstructEvent.data.asJson) + ).asJson + } + + /** + * @param shredProperty Type of self-describing entity + * @param vendor Iglu schema vendor + * @param name Iglu schema name + * @param model Iglu schema model + * @return the schema, transformed into an Elasticsearch-compatible column name + */ + def transformSchema(shredProperty: ShredProperty, vendor: String, name: String, model: Int): String = { + // Convert dots & dashes in schema vendor to underscore + val snakeCaseVendor = vendor.replaceAll("""[\.\-]""", "_").toLowerCase + + // Convert PascalCase in schema name to snake_case + val snakeCaseName = name.replaceAll("""[\.\-]""", "_").replaceAll("([^A-Z_])([A-Z])", "$1_$2").toLowerCase + + s"${shredProperty.prefix}${snakeCaseVendor}_${snakeCaseName}_$model" + } + + def transformSchema(shredProperty: ShredProperty, schema: SchemaKey): String = transformSchema(shredProperty, schema.vendor, schema.name, schema.version.model) +} diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala new file mode 100644 index 0000000..d34d3f4 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/Parser.scala @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk.decode + +import shapeless._ +import shapeless.ops.record._ +import shapeless.ops.hlist._ + +import Parser._ + +private[scalasdk] trait Parser[A] extends Serializable { + /** Heterogeneous TSV values */ + type HTSV <: HList + + /** List of field names defined on `A` */ + def knownKeys: List[Key] // TODO: should be a part of `RowDecoder` + + /** Evidence allowing to transform TSV line into `HList` */ + protected def decoder: RowDecoder[HTSV] + + /** Evidence that `A` is isomorphic to `HTSV` */ + protected def generic: Generic.Aux[A, HTSV] + + def parse(row: String): RowDecodeResult[A] = { + val values = row.split("\t", -1) + val zipped = knownKeys.zipAll(values, UnknownKeyPlaceholder, ValueIsMissingPlaceholder) + val decoded = decoder(zipped) + decoded.map { decodedValue => generic.from(decodedValue) } + } +} + +object Parser { + sealed trait DeriveParser[A] { + /** + * Get instance of parser after all evidences are given + * @tparam R full class representation with field names and types + * @tparam K evidence of field names + * @tparam L evidence of field types + */ + def get[R <: HList, K <: HList, L <: HList](implicit lgen: LabelledGeneric.Aux[A, R], + keys: Keys.Aux[R, K], + gen: Generic.Aux[A, L], + toTraversableAux: ToTraversable.Aux[K, List, Symbol], + rowDecoder: RowDecoder[L]): Parser[A] = { + new Parser[A] { + type HTSV = L + val knownKeys: List[Symbol] = keys.apply.toList[Symbol] + val decoder: RowDecoder[L] = rowDecoder + val generic: Generic.Aux[A, L] = gen + } + } + } + + /** Key name that will be used if TSV has more columns than a class */ + val UnknownKeyPlaceholder = 'UnknownKey + /** Value that will be used if class has more fields than a TSV */ + val ValueIsMissingPlaceholder = "VALUE IS MISSING" + + /** Derive a TSV parser for `A` */ + private[scalasdk] def deriveFor[A]: DeriveParser[A] = + new DeriveParser[A] {} +} diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoder.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoder.scala new file mode 100644 index 0000000..038ffb8 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/RowDecoder.scala @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk.decode + +import shapeless._ +import cats.syntax.validated._ +import cats.syntax.either._ +import cats.syntax.apply._ + +/** + * Type class to decode List of keys-value pairs into HList + * Keys derived from original class of HList, + * Values are actual TSV columns + */ +private[scalasdk] trait RowDecoder[L <: HList] extends Serializable { + def apply(row: List[(Key, String)]): RowDecodeResult[L] +} + +private[scalasdk] object RowDecoder { + import HList.ListCompat._ + + def apply[L <: HList](implicit fromRow: RowDecoder[L]): RowDecoder[L] = fromRow + + def fromFunc[L <: HList](f: List[(Key, String)] => RowDecodeResult[L]): RowDecoder[L] = + new RowDecoder[L] { + def apply(row: List[(Key, String)]) = f(row) + } + + /** Parse TSV row into HList */ + private def parse[H: ValueDecoder, T <: HList: RowDecoder](row: List[(Key, String)]) = + row match { + case h :: t => + val hv: RowDecodeResult[H] = + ValueDecoder[H].parse(h).leftMap(_._2).toValidatedNel + val tv = RowDecoder[T].apply(t) + (hv, tv).mapN { _ :: _ } + case Nil => "Not enough values, format is invalid".invalidNel + } + + implicit val hnilFromRow: RowDecoder[HNil] = fromFunc { + case Nil => HNil.validNel + case rows => s"No more values expected, following provided: ${rows.map(_._2).mkString(", ")}".invalidNel + } + + implicit def hconsFromRow[H: ValueDecoder, T <: HList: RowDecoder]: RowDecoder[H :: T] = + fromFunc { row => parse(row) } +} + diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala new file mode 100644 index 0000000..4e05a50 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoder.scala @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk +package decode + +// java +import java.time.Instant +import java.time.format.DateTimeParseException +import java.util.UUID + +// cats +import cats.syntax.either._ +import cats.syntax.option._ +import cats.syntax.show._ + +// iglu +import com.snowplowanalytics.iglu.core.SelfDescribingData +import com.snowplowanalytics.iglu.core.circe.instances._ + +// circe +import io.circe.parser.{parse => parseJson} +import io.circe.{Error, Json} + +// This library +import com.snowplowanalytics.snowplow.analytics.scalasdk.Common.{ContextsCriterion, UnstructEventCriterion} +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} + +private[decode] trait ValueDecoder[A] { + def parse(column: (Key, String)): DecodedValue[A] +} + +private[decode] object ValueDecoder { + def apply[A](implicit readA: ValueDecoder[A]): ValueDecoder[A] = readA + + def fromFunc[A](f: ((Key, String)) => DecodedValue[A]): ValueDecoder[A] = + new ValueDecoder[A] { + def parse(field: (Key, String)): DecodedValue[A] = f(field) + } + + implicit final val stringColumnDecoder: ValueDecoder[String] = + fromFunc[String] { + case (key, value) => + if (value.isEmpty) (key, s"Field $key cannot be empty").asLeft else value.asRight + } + + implicit final val stringOptionColumnDecoder: ValueDecoder[Option[String]] = + fromFunc[Option[String]] { + case (_, value) => + if (value.isEmpty) none[String].asRight else value.some.asRight + } + + implicit final val intColumnDecoder: ValueDecoder[Option[Int]] = + fromFunc[Option[Int]] { + case (key, value) => + if (value.isEmpty) none[Int].asRight + else try { + value.toInt.some.asRight + } catch { + case _: NumberFormatException => + (key, s"Cannot parse key $key with value $value into integer").asLeft + } + } + + implicit final val uuidColumnDecoder: ValueDecoder[UUID] = + fromFunc[UUID] { + case (key, value) => + if (value.isEmpty) + (key, s"Field $key cannot be empty").asLeft + else + try { + UUID.fromString(value).asRight[(Key, String)] + } catch { + case _: IllegalArgumentException => + (key, s"Cannot parse key $key with value $value into UUID").asLeft + } + } + + implicit final val boolColumnDecoder: ValueDecoder[Option[Boolean]] = + fromFunc[Option[Boolean]] { + case (key, value) => + value match { + case "0" => false.some.asRight + case "1" => true.some.asRight + case "" => none[Boolean].asRight + case _ => (key, s"Cannot parse key $key with value $value into boolean").asLeft + } + } + + implicit final val doubleColumnDecoder: ValueDecoder[Option[Double]] = + fromFunc[Option[Double]] { + case (key, value) => + if (value.isEmpty) + none[Double].asRight + else try { + value.toDouble.some.asRight + } catch { + case _: NumberFormatException => + (key, s"Cannot parse key $key with value $value into double").asLeft + } + } + + implicit final val instantColumnDecoder: ValueDecoder[Instant] = + fromFunc[Instant] { + case (key, value) => + if (value.isEmpty) + (key, s"Field $key cannot be empty").asLeft + else { + val tstamp = reformatTstamp(value) + try { + Instant.parse(tstamp).asRight + } catch { + case _: DateTimeParseException => + (key, s"Cannot parse key $key with value $value into datetime").asLeft + } + } + } + + implicit final val instantOptionColumnDecoder: ValueDecoder[Option[Instant]] = + fromFunc[Option[Instant]] { + case (key, value) => + if (value.isEmpty) + none[Instant].asRight[(Key, String)] + else { + val tstamp = reformatTstamp(value) + try { + Instant.parse(tstamp).some.asRight + } catch { + case _: DateTimeParseException => + (key, s"Cannot parse key $key with value $value into datetime").asLeft + } + } + } + + implicit final val unstructuredJson: ValueDecoder[UnstructEvent] = + fromFunc[UnstructEvent] { + case (key, value) => + def asLeft(error: Error): (Key, String) = (key, error.show) + if (value.isEmpty) + UnstructEvent(None).asRight[(Key, String)] + else + parseJson(value) + .flatMap(_.as[SelfDescribingData[Json]]) + .leftMap(asLeft) match { + case Right(SelfDescribingData(schema, data)) if UnstructEventCriterion.matches(schema) => + data.as[SelfDescribingData[Json]].leftMap(asLeft).map(_.some).map(UnstructEvent.apply) + case Right(SelfDescribingData(schema, _)) => + (key, s"Unknown payload: ${schema.toSchemaUri}").asLeft[UnstructEvent] + case Left(error) => error.asLeft[UnstructEvent] + } + } + + implicit final val contexts: ValueDecoder[Contexts] = + fromFunc[Contexts] { + case (key, value) => + def asLeft(error: Error): (Key, String) = (key, error.show) + if (value.isEmpty) + Contexts(List()).asRight[(Key, String)] + else + parseJson(value) + .flatMap(_.as[SelfDescribingData[Json]]) + .leftMap(asLeft) match { + case Right(SelfDescribingData(schema, data)) if ContextsCriterion.matches(schema) => + data.as[List[SelfDescribingData[Json]]].leftMap(asLeft).map(Contexts.apply) + case Right(SelfDescribingData(schema, _)) => + (key, s"Unknown payload: ${schema.toSchemaUri}").asLeft[Contexts] + case Left(error) => error.asLeft[Contexts] + } + } + + /** + * Converts a timestamp to an ISO-8601 format usable by Instant.parse() + * + * @param tstamp Timestamp of the form YYYY-MM-DD hh:mm:ss + * @return ISO-8601 timestamp + */ + private def reformatTstamp(tstamp: String): String = tstamp.replaceAll(" ", "T") + "Z" +} + diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/package.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/package.scala new file mode 100644 index 0000000..ad6faa5 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/package.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk + +import cats.data.ValidatedNel + +package object decode { + /** Expected name of the field */ + type Key = Symbol + + /** Result of single-value parsing */ + type DecodedValue[A] = Either[(Key, String), A] + + /** Result of TSV line parsing, which is either an event or non empty list of parse errors */ + type RowDecodeResult[A] = ValidatedNel[String, A] +} diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/Data.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/Data.scala index 0155346..0ccd152 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/Data.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/Data.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -13,6 +13,7 @@ package com.snowplowanalytics.snowplow.analytics.scalasdk.json // Json4s +import com.snowplowanalytics.iglu.core.SchemaKey import org.json4s._ /** @@ -67,7 +68,9 @@ object Data { * @param shredProperty type of shred property * @param igluUri valid Iglu URI of shred property */ - case class InventoryItem(shredProperty: ShredProperty, igluUri: IgluUri) + case class InventoryItemOld(shredProperty: ShredProperty, igluUri: IgluUri) + + case class ShreddedType(shredProperty: ShredProperty, schemaKey: SchemaKey) /** * The event as a shredded stringified JSON along with it's inventory @@ -76,7 +79,7 @@ object Data { * @param event stringified event JSON * @param inventory set of JSON fields (contexts, unsturct event) */ - case class EventWithInventory(event: String, inventory: Set[InventoryItem]) + case class EventWithInventory(event: String, inventory: Set[InventoryItemOld]) /** @@ -127,12 +130,12 @@ object Data { /** * Get JSON object along with inventory */ - def jsonAndInventory: (Set[InventoryItem], JObject) = (getInventory, getJson) + def jsonAndInventory: (Set[InventoryItemOld], JObject) = (getInventory, getJson) /** * Get set of contained JSON Schema URIs */ - def getInventory: Set[InventoryItem] + def getInventory: Set[InventoryItemOld] /** * Get ready-to-output JSON @@ -148,7 +151,7 @@ object Data { */ private[scalasdk] case class ContextsOutput(contextsType: ContextsType, contextMap: Map[IgluUri, List[JValue]]) extends TsvConverterOutput { def getInventory = - contextMap.keySet.map { uri => InventoryItem(Contexts(contextsType), uri) } + contextMap.keySet.map { uri => InventoryItemOld(Contexts(contextsType), uri) } def getJson = JObject(contextMap.toList.map { case (key, list) => (fixSchema(Contexts(contextsType), key), JArray(list)) }) @@ -162,7 +165,7 @@ object Data { */ case class UnstructEventOutput(igluUri: IgluUri, value: JValue) extends TsvConverterOutput { def getInventory = - Set(InventoryItem(UnstructEvent, igluUri)) + Set(InventoryItemOld(UnstructEvent, igluUri)) def getJson = JObject(fixSchema(UnstructEvent, igluUri) -> value) @@ -177,7 +180,7 @@ object Data { */ private[scalasdk] case class PrimitiveOutput(key: String, value: JValue) extends TsvConverterOutput { def getInventory = - Set.empty[InventoryItem] + Set.empty[InventoryItemOld] def getJson = JObject(key -> value) diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformer.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformer.scala index d411e1f..057b185 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformer.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformer.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Snowplow Analytics Ltd. + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -66,7 +66,7 @@ object EventTransformer { * @param event Array of values for the event * @return ValidatedRecord containing JSON for the event and the event_id (if it exists) */ - def jsonifyGoodEvent(event: Array[String]): Validated[(Set[InventoryItem], JObject)] = { + def jsonifyGoodEvent(event: Array[String]): Validated[(Set[InventoryItemOld], JObject)] = { getValidatedJsonEvent(event, true) } @@ -81,7 +81,7 @@ object EventTransformer { * @param flatten Whether to flatten the fields in "unstruct_event", "contexts" and "derived_contexts" * @return ValidatedRecord containing JSON for the event and the event_id (if it exists) */ - def getValidatedJsonEvent(event: Array[String], flatten: Boolean): Validated[(Set[InventoryItem], JObject)] = { + def getValidatedJsonEvent(event: Array[String], flatten: Boolean): Validated[(Set[InventoryItemOld], JObject)] = { if (isRightSize(event)) { Left(List(s"Expected ${Fields.size} fields, received ${event.length} fields. This may be caused by attempting to use this SDK version on an older (pre-R73) or newer version of Snowplow enriched events.")) } else { @@ -101,7 +101,7 @@ object EventTransformer { } } - private def getJObjectWithNestedStructures(event: Array[String]): Validated[(Set[InventoryItem], JObject)] = { + private def getJObjectWithNestedStructures(event: Array[String]): Validated[(Set[InventoryItemOld], JObject)] = { for { jObject <- convertTsvEventToNestedJObject(event) } yield (getInventory(jObject).toSet, getGeoLocationField(event) ~ jObject) @@ -121,11 +121,11 @@ object EventTransformer { val map = nonFlattenJson.obj.toMap - def getContexts(ctxType: ShredProperty): List[InventoryItem] = { + def getContexts(ctxType: ShredProperty): List[InventoryItemOld] = { (for { JObject(fields) <- map.get(ctxType.name) JArray(contexts) <- fields.toMap.get("data") - schemas = contexts.flatMap(ctx => getSchema(ctx).map(uri => InventoryItem(ctxType, uri))) + schemas = contexts.flatMap(ctx => getSchema(ctx).map(uri => InventoryItemOld(ctxType, uri))) } yield schemas).getOrElse(List.empty) } @@ -133,7 +133,7 @@ object EventTransformer { JObject(fields) <- map.get(UnstructEvent.name) event <- fields.toMap.get("data") schema <- getSchema(event) - } yield InventoryItem(UnstructEvent, schema) + } yield InventoryItemOld(UnstructEvent, schema) val customContexts = getContexts(Contexts(CustomContexts)) val derivedContexts = getContexts(Contexts(DerivedContexts)) @@ -348,7 +348,7 @@ object EventTransformer { * @param fieldInformation ((field name, field-to-JObject conversion function), field value) * @return JObject representing a single field in the JSON */ - private def converter(fieldInformation: ((String, TsvToJsonConverter), String)): Validated[(Set[InventoryItem], JObject)] = { + private def converter(fieldInformation: ((String, TsvToJsonConverter), String)): Validated[(Set[InventoryItemOld], JObject)] = { val ((fieldName, fieldConversionFunction), fieldValue) = fieldInformation if (fieldValue.isEmpty) { if (fieldName.startsWith("contexts") || fieldName.startsWith("unstruct_event") || fieldName.startsWith("derived_contexts")) { @@ -396,8 +396,8 @@ object EventTransformer { * @param initial initial (probably empty) JSON object * @return either aggregated list of converter errors or merged JSON Object */ - private[json] def convertEvent(eventTsv: List[String], initial: JObject): Validated[(Set[InventoryItem], JObject)] = { - val initialPair = (Set.empty[InventoryItem], initial) + private[json] def convertEvent(eventTsv: List[String], initial: JObject): Validated[(Set[InventoryItemOld], JObject)] = { + val initialPair = (Set.empty[InventoryItemOld], initial) val result = Fields.zip(eventTsv).map(x => converter(x)).traverseEitherL.map { kvPairsList => kvPairsList.fold(initialPair) { case ((accumInventory, accumObject), (inventory, kvPair)) => diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredder.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredder.scala index 5f16550..f8893c4 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredder.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredder.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Snowplow Analytics Ltd. + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala new file mode 100644 index 0000000..8786fe3 --- /dev/null +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala @@ -0,0 +1,1567 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package com.snowplowanalytics.snowplow.analytics.scalasdk + +// java +import java.time.Instant +import java.util.UUID + +// cats +import cats.data.Validated.{Invalid, Valid} +import cats.data.NonEmptyList + +// circe +import io.circe.{Json, JsonObject} +import io.circe.syntax._ +import io.circe.parser._ + +// Specs2 +import org.specs2.mutable.Specification + +// Iglu +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +// This library +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} +import com.snowplowanalytics.snowplow.analytics.scalasdk.json.EventTransformer +import com.snowplowanalytics.snowplow.analytics.scalasdk.json.Data + +/** + * Tests Event case class + */ +class EventSpec extends Specification { + + val unstructJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + "data": { + "targetUrl": "http://www.example.com", + "elementClasses": ["foreground"], + "elementId": "exampleLink" + } + } + }""" + + val contextsJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + }, + { + "schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0", + "data": { + "navigationStart": 1415358089861, + "unloadEventStart": 1415358090270, + "unloadEventEnd": 1415358090287, + "redirectStart": 0, + "redirectEnd": 0, + "fetchStart": 1415358089870, + "domainLookupStart": 1415358090102, + "domainLookupEnd": 1415358090102, + "connectStart": 1415358090103, + "connectEnd": 1415358090183, + "requestStart": 1415358090183, + "responseStart": 1415358090265, + "responseEnd": 1415358090265, + "domLoading": 1415358090270, + "domInteractive": 1415358090886, + "domContentLoadedEventStart": 1415358090968, + "domContentLoadedEventEnd": 1415358091309, + "domComplete": 0, + "loadEventStart": 0, + "loadEventEnd": 0 + } + } + ] + }""" + + val contextsWithDuplicate = """{ + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + }, + { + "schema": "iglu:org.acme/context_one/jsonschema/1-0-0", + "data": { + "item": 1 + } + }, + { + "schema": "iglu:org.acme/context_one/jsonschema/1-0-1", + "data": { + "item": 2 + } + } + ] + }""" + + val derivedContextsJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1", + "data": [ + { + "schema": "iglu:com.snowplowanalytics.snowplow\/ua_parser_context\/jsonschema\/1-0-0", + "data": { + "useragentFamily": "IE", + "useragentMajor": "7", + "useragentMinor": "0", + "useragentPatch": null, + "useragentVersion": "IE 7.0", + "osFamily": "Windows XP", + "osMajor": null, + "osMinor": null, + "osPatch": null, + "osPatchMinor": null, + "osVersion": "Windows XP", + "deviceFamily": "Other" + } + } + ] + }""" + + "The Event parser" should { + "successfully convert a tab-separated pageview event string to an Event instance and JSON" in { + + val input = List( + "app_id" -> "angry-birds", + "platform" -> "web", + "etl_tstamp" -> "2017-01-26 00:01:25.292", + "collector_tstamp" -> "2013-11-26 00:02:05", + "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", + "event" -> "page_view", + "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", + "txn_id" -> "41828", + "name_tracker" -> "cloudfront-1", + "v_tracker" -> "js-2.1.0", + "v_collector" -> "clj-tomcat-0.1.0", + "v_etl" -> "serde-0.5.2", + "user_id" -> "jon.doe@email.com", + "user_ipaddress" -> "92.231.54.234", + "user_fingerprint" -> "2161814971", + "domain_userid" -> "bc2e92ec6c204a14", + "domain_sessionidx" -> "3", + "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", + "geo_country" -> "US", + "geo_region" -> "TX", + "geo_city" -> "New York", + "geo_zipcode" -> "94109", + "geo_latitude" -> "37.443604", + "geo_longitude" -> "-122.4124", + "geo_region_name" -> "Florida", + "ip_isp" -> "FDN Communications", + "ip_organization" -> "Bouygues Telecom", + "ip_domain" -> "nuvox.net", + "ip_netspeed" -> "Cable/DSL", + "page_url" -> "http://www.snowplowanalytics.com", + "page_title" -> "On Analytics", + "page_referrer" -> "", + "page_urlscheme" -> "http", + "page_urlhost" -> "www.snowplowanalytics.com", + "page_urlport" -> "80", + "page_urlpath" -> "/product/index.html", + "page_urlquery" -> "id=GTM-DLRG", + "page_urlfragment" -> "4-conclusion", + "refr_urlscheme" -> "", + "refr_urlhost" -> "", + "refr_urlport" -> "", + "refr_urlpath" -> "", + "refr_urlquery" -> "", + "refr_urlfragment" -> "", + "refr_medium" -> "", + "refr_source" -> "", + "refr_term" -> "", + "mkt_medium" -> "", + "mkt_source" -> "", + "mkt_term" -> "", + "mkt_content" -> "", + "mkt_campaign" -> "", + "contexts" -> contextsJson, + "se_category" -> "", + "se_action" -> "", + "se_label" -> "", + "se_property" -> "", + "se_value" -> "", + "unstruct_event" -> unstructJson, + "tr_orderid" -> "", + "tr_affiliation" -> "", + "tr_total" -> "", + "tr_tax" -> "", + "tr_shipping" -> "", + "tr_city" -> "", + "tr_state" -> "", + "tr_country" -> "", + "ti_orderid" -> "", + "ti_sku" -> "", + "ti_name" -> "", + "ti_category" -> "", + "ti_price" -> "", + "ti_quantity" -> "", + "pp_xoffset_min" -> "", + "pp_xoffset_max" -> "", + "pp_yoffset_min" -> "", + "pp_yoffset_max" -> "", + "useragent" -> "", + "br_name" -> "", + "br_family" -> "", + "br_version" -> "", + "br_type" -> "", + "br_renderengine" -> "", + "br_lang" -> "", + "br_features_pdf" -> "1", + "br_features_flash" -> "0", + "br_features_java" -> "", + "br_features_director" -> "", + "br_features_quicktime" -> "", + "br_features_realplayer" -> "", + "br_features_windowsmedia" -> "", + "br_features_gears" -> "", + "br_features_silverlight" -> "", + "br_cookies" -> "", + "br_colordepth" -> "", + "br_viewwidth" -> "", + "br_viewheight" -> "", + "os_name" -> "", + "os_family" -> "", + "os_manufacturer" -> "", + "os_timezone" -> "", + "dvce_type" -> "", + "dvce_ismobile" -> "", + "dvce_screenwidth" -> "", + "dvce_screenheight" -> "", + "doc_charset" -> "", + "doc_width" -> "", + "doc_height" -> "", + "tr_currency" -> "", + "tr_total_base" -> "", + "tr_tax_base" -> "", + "tr_shipping_base" -> "", + "ti_currency" -> "", + "ti_price_base" -> "", + "base_currency" -> "", + "geo_timezone" -> "", + "mkt_clickid" -> "", + "mkt_network" -> "", + "etl_tags" -> "", + "dvce_sent_tstamp" -> "", + "refr_domain_userid" -> "", + "refr_device_tstamp" -> "", + "derived_contexts" -> derivedContextsJson, + "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", + "derived_tstamp" -> "2013-11-26 00:03:57.886", + "event_vendor" -> "com.snowplowanalytics.snowplow", + "event_name" -> "link_click", + "event_format" -> "jsonschema", + "event_version" -> "1-0-0", + "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", + "true_tstamp" -> "2013-11-26 00:03:57.886" + ) + + val expected = Event( + app_id = Some("angry-birds"), + platform = Some("web"), + etl_tstamp = Some(Instant.parse("2017-01-26T00:01:25.292Z")), + collector_tstamp = Instant.parse("2013-11-26T00:02:05Z"), + dvce_created_tstamp = Some(Instant.parse("2013-11-26T00:03:57.885Z")), + event = Some("page_view"), + event_id = UUID.fromString("c6ef3124-b53a-4b13-a233-0088f79dcbcb"), + txn_id = Some(41828), + name_tracker = Some("cloudfront-1"), + v_tracker = Some("js-2.1.0"), + v_collector = "clj-tomcat-0.1.0", + v_etl = "serde-0.5.2", + user_id = Some("jon.doe@email.com"), + user_ipaddress = Some("92.231.54.234"), + user_fingerprint = Some("2161814971"), + domain_userid = Some("bc2e92ec6c204a14"), + domain_sessionidx = Some(3), + network_userid = Some("ecdff4d0-9175-40ac-a8bb-325c49733607"), + geo_country = Some("US"), + geo_region = Some("TX"), + geo_city = Some("New York"), + geo_zipcode = Some("94109"), + geo_latitude = Some(37.443604), + geo_longitude = Some(-122.4124), + geo_region_name = Some("Florida"), + ip_isp = Some("FDN Communications"), + ip_organization = Some("Bouygues Telecom"), + ip_domain = Some("nuvox.net"), + ip_netspeed = Some("Cable/DSL"), + page_url = Some("http://www.snowplowanalytics.com"), + page_title = Some("On Analytics"), + page_referrer = None, + page_urlscheme = Some("http"), + page_urlhost = Some("www.snowplowanalytics.com"), + page_urlport = Some(80), + page_urlpath = Some("/product/index.html"), + page_urlquery = Some("id=GTM-DLRG"), + page_urlfragment = Some("4-conclusion"), + refr_urlscheme = None, + refr_urlhost = None, + refr_urlport = None, + refr_urlpath = None, + refr_urlquery = None, + refr_urlfragment = None, + refr_medium = None, + refr_source = None, + refr_term = None, + mkt_medium = None, + mkt_source = None, + mkt_term = None, + mkt_content = None, + mkt_campaign = None, + contexts = Contexts( + List( + SelfDescribingData( + SchemaKey( + "org.schema", + "WebPage", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("genre", "blog".asJson), + ("inLanguage", "en-US".asJson), + ("datePublished", "2014-11-06T00:00:00Z".asJson), + ("author", "Fred Blundun".asJson), + ("breadcrumb", List("blog", "releases").asJson), + ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) + ).asJson + ), + SelfDescribingData( + SchemaKey( + "org.w3", + "PerformanceTiming", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("navigationStart", 1415358089861L.asJson), + ("unloadEventStart", 1415358090270L.asJson), + ("unloadEventEnd", 1415358090287L.asJson), + ("redirectStart", 0.asJson), + ("redirectEnd", 0.asJson), + ("fetchStart", 1415358089870L.asJson), + ("domainLookupStart", 1415358090102L.asJson), + ("domainLookupEnd", 1415358090102L.asJson), + ("connectStart", 1415358090103L.asJson), + ("connectEnd", 1415358090183L.asJson), + ("requestStart", 1415358090183L.asJson), + ("responseStart", 1415358090265L.asJson), + ("responseEnd", 1415358090265L.asJson), + ("domLoading", 1415358090270L.asJson), + ("domInteractive", 1415358090886L.asJson), + ("domContentLoadedEventStart", 1415358090968L.asJson), + ("domContentLoadedEventEnd", 1415358091309L.asJson), + ("domComplete", 0.asJson), + ("loadEventStart", 0.asJson), + ("loadEventEnd", 0.asJson) + ).asJson + ) + ) + ), + se_category = None, + se_action = None, + se_label = None, + se_property = None, + se_value = None, + unstruct_event = UnstructEvent( + Some( + SelfDescribingData( + SchemaKey( + "com.snowplowanalytics.snowplow", + "link_click", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ), + JsonObject( + ("targetUrl", "http://www.example.com".asJson), + ("elementClasses", List("foreground").asJson), + ("elementId", "exampleLink".asJson) + ).asJson + ) + ) + ), + tr_orderid = None, + tr_affiliation = None, + tr_total = None, + tr_tax = None, + tr_shipping = None, + tr_city = None, + tr_state = None, + tr_country = None, + ti_orderid = None, + ti_sku = None, + ti_name = None, + ti_category = None, + ti_price = None, + ti_quantity = None, + pp_xoffset_min = None, + pp_xoffset_max = None, + pp_yoffset_min = None, + pp_yoffset_max = None, + useragent = None, + br_name = None, + br_family = None, + br_version = None, + br_type = None, + br_renderengine = None, + br_lang = None, + br_features_pdf = Some(true), + br_features_flash = Some(false), + br_features_java = None, + br_features_director = None, + br_features_quicktime = None, + br_features_realplayer = None, + br_features_windowsmedia = None, + br_features_gears = None, + br_features_silverlight = None, + br_cookies = None, + br_colordepth = None, + br_viewwidth = None, + br_viewheight = None, + os_name = None, + os_family = None, + os_manufacturer = None, + os_timezone = None, + dvce_type = None, + dvce_ismobile = None, + dvce_screenwidth = None, + dvce_screenheight = None, + doc_charset = None, + doc_width = None, + doc_height = None, + tr_currency = None, + tr_total_base = None, + tr_tax_base = None, + tr_shipping_base = None, + ti_currency = None, + ti_price_base = None, + base_currency = None, + geo_timezone = None, + mkt_clickid = None, + mkt_network = None, + etl_tags = None, + dvce_sent_tstamp = None, + refr_domain_userid = None, + refr_device_tstamp = None, + derived_contexts = Contexts( + List( + SelfDescribingData( + SchemaKey( + "com.snowplowanalytics.snowplow", + "ua_parser_context", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("useragentFamily", "IE".asJson), + ("useragentMajor", "7".asJson), + ("useragentMinor", "0".asJson), + ("useragentPatch", Json.Null), + ("useragentVersion", "IE 7.0".asJson), + ("osFamily", "Windows XP".asJson), + ("osMajor", Json.Null), + ("osMinor", Json.Null), + ("osPatch", Json.Null), + ("osPatchMinor", Json.Null), + ("osVersion", "Windows XP".asJson), + ("deviceFamily", "Other".asJson) + ).asJson + ) + ) + ), + domain_sessionid = Some("2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1"), + derived_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("link_click"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + event_fingerprint = Some("e3dbfa9cca0412c3d4052863cefb547f"), + true_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")) + ) + + val eventValues = input.unzip._2.mkString("\t") + val event = Event.parse(eventValues) + + // Case class must be processed as expected + event mustEqual Valid(expected) + + val eventJson = event.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) + + val legacyJson = parse(EventTransformer + .transform(eventValues) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation"))) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation")) + + // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) + eventJson mustEqual legacyJson + } + + "successfully convert a tab-separated pageview event string to an Event instance and JSON, omitting unstruct_event and contexts nullary fields" in { + + val input = List( + "app_id" -> "angry-birds", + "platform" -> "web", + "etl_tstamp" -> "2017-01-26 00:01:25.292", + "collector_tstamp" -> "2013-11-26 00:02:05", + "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", + "event" -> "page_view", + "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", + "txn_id" -> "41828", + "name_tracker" -> "cloudfront-1", + "v_tracker" -> "js-2.1.0", + "v_collector" -> "clj-tomcat-0.1.0", + "v_etl" -> "serde-0.5.2", + "user_id" -> "jon.doe@email.com", + "user_ipaddress" -> "92.231.54.234", + "user_fingerprint" -> "2161814971", + "domain_userid" -> "bc2e92ec6c204a14", + "domain_sessionidx" -> "3", + "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", + "geo_country" -> "US", + "geo_region" -> "TX", + "geo_city" -> "New York", + "geo_zipcode" -> "94109", + "geo_latitude" -> "37.443604", + "geo_longitude" -> "-122.4124", + "geo_region_name" -> "Florida", + "ip_isp" -> "FDN Communications", + "ip_organization" -> "Bouygues Telecom", + "ip_domain" -> "nuvox.net", + "ip_netspeed" -> "Cable/DSL", + "page_url" -> "http://www.snowplowanalytics.com", + "page_title" -> "On Analytics", + "page_referrer" -> "", + "page_urlscheme" -> "http", + "page_urlhost" -> "www.snowplowanalytics.com", + "page_urlport" -> "80", + "page_urlpath" -> "/product/index.html", + "page_urlquery" -> "id=GTM-DLRG", + "page_urlfragment" -> "4-conclusion", + "refr_urlscheme" -> "", + "refr_urlhost" -> "", + "refr_urlport" -> "", + "refr_urlpath" -> "", + "refr_urlquery" -> "", + "refr_urlfragment" -> "", + "refr_medium" -> "", + "refr_source" -> "", + "refr_term" -> "", + "mkt_medium" -> "", + "mkt_source" -> "", + "mkt_term" -> "", + "mkt_content" -> "", + "mkt_campaign" -> "", + "contexts" -> "", + "se_category" -> "", + "se_action" -> "", + "se_label" -> "", + "se_property" -> "", + "se_value" -> "", + "unstruct_event" -> "", + "tr_orderid" -> "", + "tr_affiliation" -> "", + "tr_total" -> "", + "tr_tax" -> "", + "tr_shipping" -> "", + "tr_city" -> "", + "tr_state" -> "", + "tr_country" -> "", + "ti_orderid" -> "", + "ti_sku" -> "", + "ti_name" -> "", + "ti_category" -> "", + "ti_price" -> "", + "ti_quantity" -> "", + "pp_xoffset_min" -> "", + "pp_xoffset_max" -> "", + "pp_yoffset_min" -> "", + "pp_yoffset_max" -> "", + "useragent" -> "", + "br_name" -> "", + "br_family" -> "", + "br_version" -> "", + "br_type" -> "", + "br_renderengine" -> "", + "br_lang" -> "", + "br_features_pdf" -> "1", + "br_features_flash" -> "0", + "br_features_java" -> "", + "br_features_director" -> "", + "br_features_quicktime" -> "", + "br_features_realplayer" -> "", + "br_features_windowsmedia" -> "", + "br_features_gears" -> "", + "br_features_silverlight" -> "", + "br_cookies" -> "", + "br_colordepth" -> "", + "br_viewwidth" -> "", + "br_viewheight" -> "", + "os_name" -> "", + "os_family" -> "", + "os_manufacturer" -> "", + "os_timezone" -> "", + "dvce_type" -> "", + "dvce_ismobile" -> "", + "dvce_screenwidth" -> "", + "dvce_screenheight" -> "", + "doc_charset" -> "", + "doc_width" -> "", + "doc_height" -> "", + "tr_currency" -> "", + "tr_total_base" -> "", + "tr_tax_base" -> "", + "tr_shipping_base" -> "", + "ti_currency" -> "", + "ti_price_base" -> "", + "base_currency" -> "", + "geo_timezone" -> "", + "mkt_clickid" -> "", + "mkt_network" -> "", + "etl_tags" -> "", + "dvce_sent_tstamp" -> "", + "refr_domain_userid" -> "", + "refr_device_tstamp" -> "", + "derived_contexts" -> "", + "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", + "derived_tstamp" -> "2013-11-26 00:03:57.886", + "event_vendor" -> "com.snowplowanalytics.snowplow", + "event_name" -> "link_click", + "event_format" -> "jsonschema", + "event_version" -> "1-0-0", + "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", + "true_tstamp" -> "2013-11-26 00:03:57.886" + ) + + val expected = Event( + app_id = Some("angry-birds"), + platform = Some("web"), + etl_tstamp = Some(Instant.parse("2017-01-26T00:01:25.292Z")), + collector_tstamp = Instant.parse("2013-11-26T00:02:05Z"), + dvce_created_tstamp = Some(Instant.parse("2013-11-26T00:03:57.885Z")), + event = Some("page_view"), + event_id = UUID.fromString("c6ef3124-b53a-4b13-a233-0088f79dcbcb"), + txn_id = Some(41828), + name_tracker = Some("cloudfront-1"), + v_tracker = Some("js-2.1.0"), + v_collector = "clj-tomcat-0.1.0", + v_etl = "serde-0.5.2", + user_id = Some("jon.doe@email.com"), + user_ipaddress = Some("92.231.54.234"), + user_fingerprint = Some("2161814971"), + domain_userid = Some("bc2e92ec6c204a14"), + domain_sessionidx = Some(3), + network_userid = Some("ecdff4d0-9175-40ac-a8bb-325c49733607"), + geo_country = Some("US"), + geo_region = Some("TX"), + geo_city = Some("New York"), + geo_zipcode = Some("94109"), + geo_latitude = Some(37.443604), + geo_longitude = Some(-122.4124), + geo_region_name = Some("Florida"), + ip_isp = Some("FDN Communications"), + ip_organization = Some("Bouygues Telecom"), + ip_domain = Some("nuvox.net"), + ip_netspeed = Some("Cable/DSL"), + page_url = Some("http://www.snowplowanalytics.com"), + page_title = Some("On Analytics"), + page_referrer = None, + page_urlscheme = Some("http"), + page_urlhost = Some("www.snowplowanalytics.com"), + page_urlport = Some(80), + page_urlpath = Some("/product/index.html"), + page_urlquery = Some("id=GTM-DLRG"), + page_urlfragment = Some("4-conclusion"), + refr_urlscheme = None, + refr_urlhost = None, + refr_urlport = None, + refr_urlpath = None, + refr_urlquery = None, + refr_urlfragment = None, + refr_medium = None, + refr_source = None, + refr_term = None, + mkt_medium = None, + mkt_source = None, + mkt_term = None, + mkt_content = None, + mkt_campaign = None, + contexts = Contexts(List()), + se_category = None, + se_action = None, + se_label = None, + se_property = None, + se_value = None, + unstruct_event = UnstructEvent(None), + tr_orderid = None, + tr_affiliation = None, + tr_total = None, + tr_tax = None, + tr_shipping = None, + tr_city = None, + tr_state = None, + tr_country = None, + ti_orderid = None, + ti_sku = None, + ti_name = None, + ti_category = None, + ti_price = None, + ti_quantity = None, + pp_xoffset_min = None, + pp_xoffset_max = None, + pp_yoffset_min = None, + pp_yoffset_max = None, + useragent = None, + br_name = None, + br_family = None, + br_version = None, + br_type = None, + br_renderengine = None, + br_lang = None, + br_features_pdf = Some(true), + br_features_flash = Some(false), + br_features_java = None, + br_features_director = None, + br_features_quicktime = None, + br_features_realplayer = None, + br_features_windowsmedia = None, + br_features_gears = None, + br_features_silverlight = None, + br_cookies = None, + br_colordepth = None, + br_viewwidth = None, + br_viewheight = None, + os_name = None, + os_family = None, + os_manufacturer = None, + os_timezone = None, + dvce_type = None, + dvce_ismobile = None, + dvce_screenwidth = None, + dvce_screenheight = None, + doc_charset = None, + doc_width = None, + doc_height = None, + tr_currency = None, + tr_total_base = None, + tr_tax_base = None, + tr_shipping_base = None, + ti_currency = None, + ti_price_base = None, + base_currency = None, + geo_timezone = None, + mkt_clickid = None, + mkt_network = None, + etl_tags = None, + dvce_sent_tstamp = None, + refr_domain_userid = None, + refr_device_tstamp = None, + derived_contexts = Contexts(List()), + domain_sessionid = Some("2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1"), + derived_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("link_click"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + event_fingerprint = Some("e3dbfa9cca0412c3d4052863cefb547f"), + true_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")) + ) + + val eventValues = input.unzip._2.mkString("\t") + val event = Event.parse(eventValues) + + // Case class must be processed as expected + event mustEqual Valid(expected) + + val eventJson = event.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) + + val legacyJson = parse(EventTransformer + .transform(eventValues) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation"))) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation")) + + // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) + eventJson mustEqual legacyJson + } + + "successfully merge two matching contexts into 2-elements array" in { + + val input = List( + "app_id" -> "angry-birds", + "platform" -> "web", + "etl_tstamp" -> "2017-01-26 00:01:25.292", + "collector_tstamp" -> "2013-11-26 00:02:05", + "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", + "event" -> "page_view", + "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", + "txn_id" -> "41828", + "name_tracker" -> "cloudfront-1", + "v_tracker" -> "js-2.1.0", + "v_collector" -> "clj-tomcat-0.1.0", + "v_etl" -> "serde-0.5.2", + "user_id" -> "jon.doe@email.com", + "user_ipaddress" -> "92.231.54.234", + "user_fingerprint" -> "2161814971", + "domain_userid" -> "bc2e92ec6c204a14", + "domain_sessionidx" -> "3", + "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", + "geo_country" -> "US", + "geo_region" -> "TX", + "geo_city" -> "New York", + "geo_zipcode" -> "94109", + "geo_latitude" -> "37.443604", + "geo_longitude" -> "-122.4124", + "geo_region_name" -> "Florida", + "ip_isp" -> "FDN Communications", + "ip_organization" -> "Bouygues Telecom", + "ip_domain" -> "nuvox.net", + "ip_netspeed" -> "Cable/DSL", + "page_url" -> "http://www.snowplowanalytics.com", + "page_title" -> "On Analytics", + "page_referrer" -> "", + "page_urlscheme" -> "http", + "page_urlhost" -> "www.snowplowanalytics.com", + "page_urlport" -> "80", + "page_urlpath" -> "/product/index.html", + "page_urlquery" -> "id=GTM-DLRG", + "page_urlfragment" -> "4-conclusion", + "refr_urlscheme" -> "", + "refr_urlhost" -> "", + "refr_urlport" -> "", + "refr_urlpath" -> "", + "refr_urlquery" -> "", + "refr_urlfragment" -> "", + "refr_medium" -> "", + "refr_source" -> "", + "refr_term" -> "", + "mkt_medium" -> "", + "mkt_source" -> "", + "mkt_term" -> "", + "mkt_content" -> "", + "mkt_campaign" -> "", + "contexts" -> contextsWithDuplicate, + "se_category" -> "", + "se_action" -> "", + "se_label" -> "", + "se_property" -> "", + "se_value" -> "", + "unstruct_event" -> unstructJson, + "tr_orderid" -> "", + "tr_affiliation" -> "", + "tr_total" -> "", + "tr_tax" -> "", + "tr_shipping" -> "", + "tr_city" -> "", + "tr_state" -> "", + "tr_country" -> "", + "ti_orderid" -> "", + "ti_sku" -> "", + "ti_name" -> "", + "ti_category" -> "", + "ti_price" -> "", + "ti_quantity" -> "", + "pp_xoffset_min" -> "", + "pp_xoffset_max" -> "", + "pp_yoffset_min" -> "", + "pp_yoffset_max" -> "", + "useragent" -> "", + "br_name" -> "", + "br_family" -> "", + "br_version" -> "", + "br_type" -> "", + "br_renderengine" -> "", + "br_lang" -> "", + "br_features_pdf" -> "1", + "br_features_flash" -> "0", + "br_features_java" -> "", + "br_features_director" -> "", + "br_features_quicktime" -> "", + "br_features_realplayer" -> "", + "br_features_windowsmedia" -> "", + "br_features_gears" -> "", + "br_features_silverlight" -> "", + "br_cookies" -> "", + "br_colordepth" -> "", + "br_viewwidth" -> "", + "br_viewheight" -> "", + "os_name" -> "", + "os_family" -> "", + "os_manufacturer" -> "", + "os_timezone" -> "", + "dvce_type" -> "", + "dvce_ismobile" -> "", + "dvce_screenwidth" -> "", + "dvce_screenheight" -> "", + "doc_charset" -> "", + "doc_width" -> "", + "doc_height" -> "", + "tr_currency" -> "", + "tr_total_base" -> "", + "tr_tax_base" -> "", + "tr_shipping_base" -> "", + "ti_currency" -> "", + "ti_price_base" -> "", + "base_currency" -> "", + "geo_timezone" -> "", + "mkt_clickid" -> "", + "mkt_network" -> "", + "etl_tags" -> "", + "dvce_sent_tstamp" -> "", + "refr_domain_userid" -> "", + "refr_device_tstamp" -> "", + "derived_contexts" -> derivedContextsJson, + "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", + "derived_tstamp" -> "2013-11-26 00:03:57.886", + "event_vendor" -> "com.snowplowanalytics.snowplow", + "event_name" -> "link_click", + "event_format" -> "jsonschema", + "event_version" -> "1-0-0", + "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", + "true_tstamp" -> "2013-11-26 00:03:57.886" + ) + + val expected = Event( + app_id = Some("angry-birds"), + platform = Some("web"), + etl_tstamp = Some(Instant.parse("2017-01-26T00:01:25.292Z")), + collector_tstamp = Instant.parse("2013-11-26T00:02:05Z"), + dvce_created_tstamp = Some(Instant.parse("2013-11-26T00:03:57.885Z")), + event = Some("page_view"), + event_id = UUID.fromString("c6ef3124-b53a-4b13-a233-0088f79dcbcb"), + txn_id = Some(41828), + name_tracker = Some("cloudfront-1"), + v_tracker = Some("js-2.1.0"), + v_collector = "clj-tomcat-0.1.0", + v_etl = "serde-0.5.2", + user_id = Some("jon.doe@email.com"), + user_ipaddress = Some("92.231.54.234"), + user_fingerprint = Some("2161814971"), + domain_userid = Some("bc2e92ec6c204a14"), + domain_sessionidx = Some(3), + network_userid = Some("ecdff4d0-9175-40ac-a8bb-325c49733607"), + geo_country = Some("US"), + geo_region = Some("TX"), + geo_city = Some("New York"), + geo_zipcode = Some("94109"), + geo_latitude = Some(37.443604), + geo_longitude = Some(-122.4124), + geo_region_name = Some("Florida"), + ip_isp = Some("FDN Communications"), + ip_organization = Some("Bouygues Telecom"), + ip_domain = Some("nuvox.net"), + ip_netspeed = Some("Cable/DSL"), + page_url = Some("http://www.snowplowanalytics.com"), + page_title = Some("On Analytics"), + page_referrer = None, + page_urlscheme = Some("http"), + page_urlhost = Some("www.snowplowanalytics.com"), + page_urlport = Some(80), + page_urlpath = Some("/product/index.html"), + page_urlquery = Some("id=GTM-DLRG"), + page_urlfragment = Some("4-conclusion"), + refr_urlscheme = None, + refr_urlhost = None, + refr_urlport = None, + refr_urlpath = None, + refr_urlquery = None, + refr_urlfragment = None, + refr_medium = None, + refr_source = None, + refr_term = None, + mkt_medium = None, + mkt_source = None, + mkt_term = None, + mkt_content = None, + mkt_campaign = None, + contexts = Contexts( + List( + SelfDescribingData( + SchemaKey( + "org.schema", + "WebPage", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("genre", "blog".asJson), + ("inLanguage", "en-US".asJson), + ("datePublished", "2014-11-06T00:00:00Z".asJson), + ("author", "Fred Blundun".asJson), + ("breadcrumb", List("blog", "releases").asJson), + ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) + ).asJson + ), + SelfDescribingData( + SchemaKey( + "org.acme", + "context_one", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("item", 1.asJson) + ).asJson + ), + SelfDescribingData( + SchemaKey( + "org.acme", + "context_one", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ), + JsonObject( + ("item", 2.asJson) + ).asJson + ) + ) + ), + se_category = None, + se_action = None, + se_label = None, + se_property = None, + se_value = None, + unstruct_event = UnstructEvent( + Some( + SelfDescribingData( + SchemaKey( + "com.snowplowanalytics.snowplow", + "link_click", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ), + JsonObject( + ("targetUrl", "http://www.example.com".asJson), + ("elementClasses", List("foreground").asJson), + ("elementId", "exampleLink".asJson) + ).asJson + ) + ) + ), + tr_orderid = None, + tr_affiliation = None, + tr_total = None, + tr_tax = None, + tr_shipping = None, + tr_city = None, + tr_state = None, + tr_country = None, + ti_orderid = None, + ti_sku = None, + ti_name = None, + ti_category = None, + ti_price = None, + ti_quantity = None, + pp_xoffset_min = None, + pp_xoffset_max = None, + pp_yoffset_min = None, + pp_yoffset_max = None, + useragent = None, + br_name = None, + br_family = None, + br_version = None, + br_type = None, + br_renderengine = None, + br_lang = None, + br_features_pdf = Some(true), + br_features_flash = Some(false), + br_features_java = None, + br_features_director = None, + br_features_quicktime = None, + br_features_realplayer = None, + br_features_windowsmedia = None, + br_features_gears = None, + br_features_silverlight = None, + br_cookies = None, + br_colordepth = None, + br_viewwidth = None, + br_viewheight = None, + os_name = None, + os_family = None, + os_manufacturer = None, + os_timezone = None, + dvce_type = None, + dvce_ismobile = None, + dvce_screenwidth = None, + dvce_screenheight = None, + doc_charset = None, + doc_width = None, + doc_height = None, + tr_currency = None, + tr_total_base = None, + tr_tax_base = None, + tr_shipping_base = None, + ti_currency = None, + ti_price_base = None, + base_currency = None, + geo_timezone = None, + mkt_clickid = None, + mkt_network = None, + etl_tags = None, + dvce_sent_tstamp = None, + refr_domain_userid = None, + refr_device_tstamp = None, + derived_contexts = Contexts( + List( + SelfDescribingData( + SchemaKey( + "com.snowplowanalytics.snowplow", + "ua_parser_context", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("useragentFamily", "IE".asJson), + ("useragentMajor", "7".asJson), + ("useragentMinor", "0".asJson), + ("useragentPatch", Json.Null), + ("useragentVersion", "IE 7.0".asJson), + ("osFamily", "Windows XP".asJson), + ("osMajor", Json.Null), + ("osMinor", Json.Null), + ("osPatch", Json.Null), + ("osPatchMinor", Json.Null), + ("osVersion", "Windows XP".asJson), + ("deviceFamily", "Other".asJson) + ).asJson + ) + ) + ), + domain_sessionid = Some("2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1"), + derived_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("link_click"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + event_fingerprint = Some("e3dbfa9cca0412c3d4052863cefb547f"), + true_tstamp = Some(Instant.parse("2013-11-26T00:03:57.886Z")) + ) + + val eventValues = input.unzip._2.mkString("\t") + val event = Event.parse(eventValues) + + // Case class must be processed as expected + event mustEqual Valid(expected) + + val eventJson = event.getOrElse(throw new RuntimeException("Failed to parse event")).toJson(true) + + val legacyJson = parse(EventTransformer + .transform(eventValues) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation"))) + .right + .toOption + .getOrElse(throw new RuntimeException("Event failed transformation")) + + // JSON output must be equal to output from the old transformer. (NB: field ordering in new JSON will be randomized) + eventJson mustEqual legacyJson + } + + "return correct results from helper methods" in { + val input = List( + "app_id" -> "angry-birds", + "platform" -> "web", + "etl_tstamp" -> "2017-01-26 00:01:25.292", + "collector_tstamp" -> "2013-11-26 00:02:05", + "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", + "event" -> "page_view", + "event_id" -> "c6ef3124-b53a-4b13-a233-0088f79dcbcb", + "txn_id" -> "41828", + "name_tracker" -> "cloudfront-1", + "v_tracker" -> "js-2.1.0", + "v_collector" -> "clj-tomcat-0.1.0", + "v_etl" -> "serde-0.5.2", + "user_id" -> "jon.doe@email.com", + "user_ipaddress" -> "92.231.54.234", + "user_fingerprint" -> "2161814971", + "domain_userid" -> "bc2e92ec6c204a14", + "domain_sessionidx" -> "3", + "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", + "geo_country" -> "US", + "geo_region" -> "TX", + "geo_city" -> "New York", + "geo_zipcode" -> "94109", + "geo_latitude" -> "37.443604", + "geo_longitude" -> "-122.4124", + "geo_region_name" -> "Florida", + "ip_isp" -> "FDN Communications", + "ip_organization" -> "Bouygues Telecom", + "ip_domain" -> "nuvox.net", + "ip_netspeed" -> "Cable/DSL", + "page_url" -> "http://www.snowplowanalytics.com", + "page_title" -> "On Analytics", + "page_referrer" -> "", + "page_urlscheme" -> "http", + "page_urlhost" -> "www.snowplowanalytics.com", + "page_urlport" -> "80", + "page_urlpath" -> "/product/index.html", + "page_urlquery" -> "id=GTM-DLRG", + "page_urlfragment" -> "4-conclusion", + "refr_urlscheme" -> "", + "refr_urlhost" -> "", + "refr_urlport" -> "", + "refr_urlpath" -> "", + "refr_urlquery" -> "", + "refr_urlfragment" -> "", + "refr_medium" -> "", + "refr_source" -> "", + "refr_term" -> "", + "mkt_medium" -> "", + "mkt_source" -> "", + "mkt_term" -> "", + "mkt_content" -> "", + "mkt_campaign" -> "", + "contexts" -> contextsWithDuplicate, + "se_category" -> "", + "se_action" -> "", + "se_label" -> "", + "se_property" -> "", + "se_value" -> "", + "unstruct_event" -> unstructJson, + "tr_orderid" -> "", + "tr_affiliation" -> "", + "tr_total" -> "", + "tr_tax" -> "", + "tr_shipping" -> "", + "tr_city" -> "", + "tr_state" -> "", + "tr_country" -> "", + "ti_orderid" -> "", + "ti_sku" -> "", + "ti_name" -> "", + "ti_category" -> "", + "ti_price" -> "", + "ti_quantity" -> "", + "pp_xoffset_min" -> "", + "pp_xoffset_max" -> "", + "pp_yoffset_min" -> "", + "pp_yoffset_max" -> "", + "useragent" -> "", + "br_name" -> "", + "br_family" -> "", + "br_version" -> "", + "br_type" -> "", + "br_renderengine" -> "", + "br_lang" -> "", + "br_features_pdf" -> "1", + "br_features_flash" -> "0", + "br_features_java" -> "", + "br_features_director" -> "", + "br_features_quicktime" -> "", + "br_features_realplayer" -> "", + "br_features_windowsmedia" -> "", + "br_features_gears" -> "", + "br_features_silverlight" -> "", + "br_cookies" -> "", + "br_colordepth" -> "", + "br_viewwidth" -> "", + "br_viewheight" -> "", + "os_name" -> "", + "os_family" -> "", + "os_manufacturer" -> "", + "os_timezone" -> "", + "dvce_type" -> "", + "dvce_ismobile" -> "", + "dvce_screenwidth" -> "", + "dvce_screenheight" -> "", + "doc_charset" -> "", + "doc_width" -> "", + "doc_height" -> "", + "tr_currency" -> "", + "tr_total_base" -> "", + "tr_tax_base" -> "", + "tr_shipping_base" -> "", + "ti_currency" -> "", + "ti_price_base" -> "", + "base_currency" -> "", + "geo_timezone" -> "", + "mkt_clickid" -> "", + "mkt_network" -> "", + "etl_tags" -> "", + "dvce_sent_tstamp" -> "", + "refr_domain_userid" -> "", + "refr_device_tstamp" -> "", + "derived_contexts" -> derivedContextsJson, + "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", + "derived_tstamp" -> "2013-11-26 00:03:57.886", + "event_vendor" -> "com.snowplowanalytics.snowplow", + "event_name" -> "link_click", + "event_format" -> "jsonschema", + "event_version" -> "1-0-0", + "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f", + "true_tstamp" -> "2013-11-26 00:03:57.886" + ) + + val eventValues = input.unzip._2.mkString("\t") + val event = Event.parse(eventValues).getOrElse(throw new RuntimeException("Failed to parse event")) + + event.geoLocation must beSome(("geo_location", "37.443604,-122.4124".asJson)) + event.contexts.toShreddedJson mustEqual Map( + "contexts_org_schema_web_page_1" -> + List( + JsonObject( + ("genre", "blog".asJson), + ("inLanguage", "en-US".asJson), + ("datePublished", "2014-11-06T00:00:00Z".asJson), + ("author", "Fred Blundun".asJson), + ("breadcrumb", List("blog", "releases").asJson), + ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) + ).asJson + ).asJson, + "contexts_org_acme_context_one_1" -> + List( + JsonObject( + ("item", 1.asJson) + ).asJson, + JsonObject( + ("item", 2.asJson) + ).asJson + ).asJson + ) + event.derived_contexts.toShreddedJson mustEqual Map( + "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1" -> + List( + JsonObject( + ("useragentFamily", "IE".asJson), + ("useragentMajor", "7".asJson), + ("useragentMinor", "0".asJson), + ("useragentPatch", Json.Null), + ("useragentVersion", "IE 7.0".asJson), + ("osFamily", "Windows XP".asJson), + ("osMajor", Json.Null), + ("osMinor", Json.Null), + ("osPatch", Json.Null), + ("osPatchMinor", Json.Null), + ("osVersion", "Windows XP".asJson), + ("deviceFamily", "Other".asJson) + ).asJson + ).asJson + ) + event.unstruct_event.toShreddedJson must beSome( + "unstruct_event_com_snowplowanalytics_snowplow_link_click_1", + JsonObject( + ("targetUrl", "http://www.example.com".asJson), + ("elementClasses", List("foreground").asJson), + ("elementId", "exampleLink".asJson) + ).asJson + ) + } + + "fail if column values are invalid (and combine errors)" in { + + val input = List( + "app_id" -> "angry-birds", + "platform" -> "web", + "etl_tstamp" -> "not_an_instant", + "collector_tstamp" -> "", + "dvce_created_tstamp" -> "2013-11-26 00:03:57.885", + "event" -> "page_view", + "event_id" -> "not_a_uuid", + "txn_id" -> "not_an_integer", + "name_tracker" -> "cloudfront-1", + "v_tracker" -> "js-2.1.0", + "v_collector" -> "", + "v_etl" -> "serde-0.5.2", + "user_id" -> "jon.doe@email.com", + "user_ipaddress" -> "92.231.54.234", + "user_fingerprint" -> "2161814971", + "domain_userid" -> "bc2e92ec6c204a14", + "domain_sessionidx" -> "3", + "network_userid" -> "ecdff4d0-9175-40ac-a8bb-325c49733607", + "geo_country" -> "US", + "geo_region" -> "TX", + "geo_city" -> "New York", + "geo_zipcode" -> "94109", + "geo_latitude" -> "not_a_double", + "geo_longitude" -> "-122.4124", + "geo_region_name" -> "Florida", + "ip_isp" -> "FDN Communications", + "ip_organization" -> "Bouygues Telecom", + "ip_domain" -> "nuvox.net", + "ip_netspeed" -> "Cable/DSL", + "page_url" -> "http://www.snowplowanalytics.com", + "page_title" -> "On Analytics", + "page_referrer" -> "", + "page_urlscheme" -> "http", + "page_urlhost" -> "www.snowplowanalytics.com", + "page_urlport" -> "80", + "page_urlpath" -> "/product/index.html", + "page_urlquery" -> "id=GTM-DLRG", + "page_urlfragment" -> "4-conclusion", + "refr_urlscheme" -> "", + "refr_urlhost" -> "", + "refr_urlport" -> "", + "refr_urlpath" -> "", + "refr_urlquery" -> "", + "refr_urlfragment" -> "", + "refr_medium" -> "", + "refr_source" -> "", + "refr_term" -> "", + "mkt_medium" -> "", + "mkt_source" -> "", + "mkt_term" -> "", + "mkt_content" -> "", + "mkt_campaign" -> "", + "contexts" -> contextsJson, + "se_category" -> "", + "se_action" -> "", + "se_label" -> "", + "se_property" -> "", + "se_value" -> "", + "unstruct_event" -> unstructJson, + "tr_orderid" -> "", + "tr_affiliation" -> "", + "tr_total" -> "", + "tr_tax" -> "", + "tr_shipping" -> "", + "tr_city" -> "", + "tr_state" -> "", + "tr_country" -> "", + "ti_orderid" -> "", + "ti_sku" -> "", + "ti_name" -> "", + "ti_category" -> "", + "ti_price" -> "", + "ti_quantity" -> "", + "pp_xoffset_min" -> "", + "pp_xoffset_max" -> "", + "pp_yoffset_min" -> "", + "pp_yoffset_max" -> "", + "useragent" -> "", + "br_name" -> "", + "br_family" -> "", + "br_version" -> "", + "br_type" -> "", + "br_renderengine" -> "", + "br_lang" -> "", + "br_features_pdf" -> "not_a_boolean", + "br_features_flash" -> "0", + "br_features_java" -> "", + "br_features_director" -> "", + "br_features_quicktime" -> "", + "br_features_realplayer" -> "", + "br_features_windowsmedia" -> "", + "br_features_gears" -> "", + "br_features_silverlight" -> "", + "br_cookies" -> "", + "br_colordepth" -> "", + "br_viewwidth" -> "", + "br_viewheight" -> "", + "os_name" -> "", + "os_family" -> "", + "os_manufacturer" -> "", + "os_timezone" -> "", + "dvce_type" -> "", + "dvce_ismobile" -> "", + "dvce_screenwidth" -> "", + "dvce_screenheight" -> "", + "doc_charset" -> "", + "doc_width" -> "", + "doc_height" -> "", + "tr_currency" -> "", + "tr_total_base" -> "", + "tr_tax_base" -> "", + "tr_shipping_base" -> "", + "ti_currency" -> "", + "ti_price_base" -> "", + "base_currency" -> "", + "geo_timezone" -> "", + "mkt_clickid" -> "", + "mkt_network" -> "", + "etl_tags" -> "", + "dvce_sent_tstamp" -> "", + "refr_domain_userid" -> "", + "refr_device_tstamp" -> "", + "derived_contexts" -> derivedContextsJson, + "domain_sessionid" -> "2b15e5c8-d3b1-11e4-b9d6-1681e6b88ec1", + "derived_tstamp" -> "2013-11-26 00:03:57.886", + "event_vendor" -> "com.snowplowanalytics.snowplow", + "event_name" -> "link_click", + "event_format" -> "jsonschema", + "event_version" -> "1-0-0", + "event_fingerprint" -> "e3dbfa9cca0412c3d4052863cefb547f" + ) + + val eventValues = input.unzip._2.mkString("\t") + val event = Event.parse(eventValues) + + // Case class must be correctly invalidated + event mustEqual Invalid(NonEmptyList.of( + "Cannot parse key 'etl_tstamp with value not_an_instant into datetime", + "Field 'collector_tstamp cannot be empty", + "Cannot parse key 'event_id with value not_a_uuid into UUID", + "Cannot parse key 'txn_id with value not_an_integer into integer", + "Field 'v_collector cannot be empty", + "Cannot parse key 'geo_latitude with value not_a_double into double", + "Cannot parse key 'br_features_pdf with value not_a_boolean into boolean", + "Cannot parse key 'true_tstamp with value VALUE IS MISSING into datetime" + )) + } + } + + "The transformSchema method" should { + "successfully convert schemas into snake_case" in { + SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), "org.w3", "PerformanceTiming", 1) mustEqual "contexts_org_w3_performance_timing_1" + SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), "org.w3", "PerformanceTiming", 1) mustEqual Data.fixSchema(Data.Contexts(Data.CustomContexts), "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0") + SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), SchemaKey("org.w3", "PerformanceTiming", "jsonschema", SchemaVer.Full(1, 0, 0))) mustEqual "contexts_org_w3_performance_timing_1" + SnowplowEvent.transformSchema(Data.Contexts(Data.CustomContexts), "com.snowplowanalytics.snowplow", "ua_parser_context", 1) mustEqual "contexts_com_snowplowanalytics_snowplow_ua_parser_context_1" + SnowplowEvent.transformSchema(Data.UnstructEvent, "com.snowplowanalytics.self-desc", "schema", 1) mustEqual "unstruct_event_com_snowplowanalytics_self_desc_schema_1" + } + } +} diff --git a/src/test/scala/RunManifestsSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/RunManifestsSpec.scala similarity index 100% rename from src/test/scala/RunManifestsSpec.scala rename to src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/RunManifestsSpec.scala diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoderSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoderSpec.scala new file mode 100644 index 0000000..d62377a --- /dev/null +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/decode/ValueDecoderSpec.scala @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package com.snowplowanalytics.snowplow.analytics.scalasdk.decode + +// java +import java.time.Instant +import java.util.UUID + +// circe +import io.circe.{Json, JsonObject} +import io.circe.syntax._ +import io.circe.parser._ + +// cats +import cats.syntax.either._ + +// Specs2 +import org.specs2.mutable.Specification + +// Iglu +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +// This library +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} + +/** + * Tests ValueDecoder class + */ +class ValueDecoderSpec extends Specification { + + "The ValueDecoder class" should { + "parse String and Option[String] values" in { + ValueDecoder[String].parse(Symbol("key"), "") mustEqual (Symbol("key"), "Field 'key cannot be empty").asLeft + ValueDecoder[String].parse(Symbol("key"), "value") mustEqual "value".asRight + ValueDecoder[Option[String]].parse(Symbol("key"), "") mustEqual None.asRight + ValueDecoder[Option[String]].parse(Symbol("key"), "value") mustEqual Some("value").asRight + } + + "parse Option[Int] values" in { + ValueDecoder[Option[Int]].parse(Symbol("key"), "") mustEqual None.asRight + ValueDecoder[Option[Int]].parse(Symbol("key"), "42") mustEqual Some(42).asRight + ValueDecoder[Option[Int]].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into integer").asLeft + } + + "parse UUID values" in { + ValueDecoder[UUID].parse(Symbol("key"), "") mustEqual (Symbol("key"), "Field 'key cannot be empty").asLeft + ValueDecoder[UUID].parse(Symbol("key"), "d2161fd1-ffed-41df-ac3e-a729012105f5") mustEqual UUID.fromString("d2161fd1-ffed-41df-ac3e-a729012105f5").asRight + ValueDecoder[UUID].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into UUID").asLeft + } + + "parse Option[Boolean] values" in { + ValueDecoder[Option[Boolean]].parse(Symbol("key"), "") mustEqual None.asRight + ValueDecoder[Option[Boolean]].parse(Symbol("key"), "0") mustEqual Some(false).asRight + ValueDecoder[Option[Boolean]].parse(Symbol("key"), "1") mustEqual Some(true).asRight + ValueDecoder[Option[Boolean]].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into boolean").asLeft + } + + "parse Option[Double] values" in { + ValueDecoder[Option[Double]].parse(Symbol("key"), "") mustEqual None.asRight + ValueDecoder[Option[Double]].parse(Symbol("key"), "42.5") mustEqual Some(42.5).asRight + ValueDecoder[Option[Double]].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into double").asLeft + } + + "parse Instant and Option[Instant] values" in { + ValueDecoder[Instant].parse(Symbol("key"), "") mustEqual (Symbol("key"), "Field 'key cannot be empty").asLeft + ValueDecoder[Instant].parse(Symbol("key"), "2013-11-26 00:03:57.885") mustEqual Instant.parse("2013-11-26T00:03:57.885Z").asRight + ValueDecoder[Instant].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into datetime").asLeft + ValueDecoder[Option[Instant]].parse(Symbol("key"), "") mustEqual None.asRight + ValueDecoder[Option[Instant]].parse(Symbol("key"), "2013-11-26 00:03:57.885") mustEqual Some(Instant.parse("2013-11-26T00:03:57.885Z")).asRight + ValueDecoder[Option[Instant]].parse(Symbol("key"), "value") mustEqual (Symbol("key"), "Cannot parse key 'key with value value into datetime").asLeft + } + + "parse Contexts values" in { + val validContexts = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + } + ] + }""" + val invalidPayloadContexts = + """{ + "schema": "iglu:invalid/schema/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + } + ] + }""" + ValueDecoder[Contexts].parse(Symbol("key"), "") mustEqual Contexts(List()).asRight + ValueDecoder[Contexts].parse(Symbol("key"), validContexts) mustEqual Contexts( + List( + SelfDescribingData( + SchemaKey( + "org.schema", + "WebPage", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ), + JsonObject( + ("genre", "blog".asJson), + ("inLanguage", "en-US".asJson), + ("datePublished", "2014-11-06T00:00:00Z".asJson), + ("author", "Fred Blundun".asJson), + ("breadcrumb", List("blog", "releases").asJson), + ("keywords", List("snowplow", "javascript", "tracker", "event").asJson) + ).asJson + ) + ) + ).asRight + ValueDecoder[Contexts].parse(Symbol("key"), invalidPayloadContexts) mustEqual (Symbol("key"), "Unknown payload: iglu:invalid/schema/jsonschema/1-0-0").asLeft + } + + "parse UnstructEvent values" in { + val validUnstruct = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + "data": { + "targetUrl": "http://www.example.com", + "elementClasses": ["foreground"], + "elementId": "exampleLink" + } + } + }""" + val invalidPayloadUnstruct = + """{ + "schema": "iglu:invalid/schema/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + "data": { + "targetUrl": "http://www.example.com", + "elementClasses": ["foreground"], + "elementId": "exampleLink" + } + } + }""" + ValueDecoder[UnstructEvent].parse(Symbol("key"), "") mustEqual UnstructEvent(None).asRight + ValueDecoder[UnstructEvent].parse(Symbol("key"), validUnstruct) mustEqual UnstructEvent( + Some( + SelfDescribingData( + SchemaKey( + "com.snowplowanalytics.snowplow", + "link_click", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ), + JsonObject( + ("targetUrl", "http://www.example.com".asJson), + ("elementClasses", List("foreground").asJson), + ("elementId", "exampleLink".asJson) + ).asJson + ) + ) + ).asRight + ValueDecoder[UnstructEvent].parse(Symbol("key"), invalidPayloadUnstruct) mustEqual (Symbol("key"), "Unknown payload: iglu:invalid/schema/jsonschema/1-0-0").asLeft + } + } +} diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/EventTransformerSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformerSpec.scala similarity index 98% rename from src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/EventTransformerSpec.scala rename to src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformerSpec.scala index 6474d0c..2e17767 100644 --- a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/EventTransformerSpec.scala +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/EventTransformerSpec.scala @@ -1115,10 +1115,10 @@ class EventTransformerSpec extends Specification { val resultJson = parse(eventWithInventory.event) val inventoryExpectation = eventWithInventory.inventory mustEqual(Set( - InventoryItem(Contexts(CustomContexts), "iglu:org.schema/WebPage/jsonschema/1-0-0"), - InventoryItem(Contexts(CustomContexts), "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0"), - InventoryItem(UnstructEvent, "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1"), - InventoryItem(Contexts(DerivedContexts), "iglu:com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-0-0") + InventoryItemOld(Contexts(CustomContexts), "iglu:org.schema/WebPage/jsonschema/1-0-0"), + InventoryItemOld(Contexts(CustomContexts), "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0"), + InventoryItemOld(UnstructEvent, "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1"), + InventoryItemOld(Contexts(DerivedContexts), "iglu:com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-0-0") )) val diffExpectation = (resultJson diff expected) mustEqual Diff(JNothing, JNothing, JNothing) @@ -1433,11 +1433,11 @@ class EventTransformerSpec extends Specification { val expectedContexts = parse(contextsWithDuplicate) inventory mustEqual Set( - InventoryItem(Contexts(CustomContexts), "iglu:org.schema/WebPage/jsonschema/1-0-0"), - InventoryItem(UnstructEvent, "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1"), - InventoryItem(Contexts(DerivedContexts), "iglu:com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-0-0"), - InventoryItem(Contexts(CustomContexts),"iglu:org.acme/context_one/jsonschema/1-0-1"), - InventoryItem(Contexts(CustomContexts),"iglu:org.acme/context_one/jsonschema/1-0-0") + InventoryItemOld(Contexts(CustomContexts), "iglu:org.schema/WebPage/jsonschema/1-0-0"), + InventoryItemOld(UnstructEvent, "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1"), + InventoryItemOld(Contexts(DerivedContexts), "iglu:com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-0-0"), + InventoryItemOld(Contexts(CustomContexts),"iglu:org.acme/context_one/jsonschema/1-0-1"), + InventoryItemOld(Contexts(CustomContexts),"iglu:org.acme/context_one/jsonschema/1-0-0") ) val contexts = json \ "contexts" diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/JsonShredderSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredderSpec.scala similarity index 100% rename from src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/JsonShredderSpec.scala rename to src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/JsonShredderSpec.scala diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/SyntaxSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/SyntaxSpec.scala similarity index 100% rename from src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk.json/SyntaxSpec.scala rename to src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/json/SyntaxSpec.scala