Skip to content

Commit

Permalink
Merge branch 'aws-poc' into feature/1416-aws-emr-poc
Browse files Browse the repository at this point in the history
% Conflicts:
%	pom.xml
  • Loading branch information
dk1844 committed Aug 24, 2020
2 parents 85570d7 + aa69593 commit 0bd704c
Show file tree
Hide file tree
Showing 28 changed files with 374 additions and 289 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ The coverage reports are written in each module's `target` directory and aggrega
#### Menas requirements:
- [**Tomcat 8.5/9.0** installation](https://tomcat.apache.org/download-90.cgi)
- [**MongoDB 4.0** installation](https://docs.mongodb.com/manual/administration/install-community/)
- [**Spline UI deployment**](https://absaoss.github.io/spline/) - place the [spline.war](https://search.maven.org/remotecontent?filepath=za/co/absa/spline/spline-web/0.3.9/spline-web-0.3.9.war)
in your Tomcat webapps directory (rename after downloading to _spline.war_); NB! don't forget to set up the `spline.mongodb.url` configuration for the _war_
- [**Spline service deployment**](https://absaoss.github.io/spline/#get-spline)
- **HADOOP_CONF_DIR** environment variable, pointing to the location of your hadoop configuration (pointing to a hadoop installation)

The _Spline UI_ can be omitted; in such case the **Menas** `spline.urlTemplate` setting should be set to empty string.
The _Spline service_ can be omitted; in such case the **Standardization** and **Conformance** `spline.producer.url` setting
as well as **Menas** `menas.lineage.readApiUrl` and `menas.oozie.lineageWriteApiUrl` settings should be all set to empty string.

#### Deploying Menas
Simply copy the **menas.war** file produced when building the project into Tomcat's webapps directory.
Expand Down Expand Up @@ -106,7 +106,7 @@ password=changeme
--deploy-mode <client/cluster> \
--driver-cores <num> \
--driver-memory <num>G \
--conf "spark.driver.extraJavaOptions=-Dmenas.rest.uri=<menas_api_uri:port> -Dstandardized.hdfs.path=<path_for_standardized_output>-{0}-{1}-{2}-{3} -Dspline.mongodb.url=<mongo_url_for_spline> -Dspline.mongodb.name=<spline_database_name> -Dhdp.version=<hadoop_version>" \
--conf "spark.driver.extraJavaOptions=-Dmenas.rest.uri=<menas_api_uri:port> -Dstandardized.hdfs.path=<path_for_standardized_output>-{0}-{1}-{2}-{3} -Dspline.producer.url=<url_for_spline_consumer> -Dhdp.version=<hadoop_version>" \
--class za.co.absa.enceladus.standardization.StandardizationJob \
<spark-jobs_<build_version>.jar> \
--menas-auth-keytab <path_to_keytab_file> \
Expand All @@ -130,7 +130,7 @@ password=changeme
--driver-cores <num> \
--driver-memory <num>G \
--conf 'spark.ui.port=29000' \
--conf "spark.driver.extraJavaOptions=-Dmenas.rest.uri=<menas_api_uri:port> -Dstandardized.hdfs.path=<path_of_standardized_input>-{0}-{1}-{2}-{3} -Dconformance.mappingtable.pattern=reportDate={0}-{1}-{2} -Dspline.mongodb.url=<mongo_url_for_spline> -Dspline.mongodb.name=<spline_database_name>" -Dhdp.version=<hadoop_version> \
--conf "spark.driver.extraJavaOptions=-Dmenas.rest.uri=<menas_api_uri:port> -Dstandardized.hdfs.path=<path_of_standardized_input>-{0}-{1}-{2}-{3} -Dconformance.mappingtable.pattern=reportDate={0}-{1}-{2} -Dspline.producer.url=<url_for_spline_consumer> -Dhdp.version=<hadoop_version>" \
--packages za.co.absa:enceladus-parent:<version>,za.co.absa:enceladus-conformance:<version> \
--class za.co.absa.enceladus.conformance.DynamicConformanceJob \
<spark-jobs_<build_version>.jar> \
Expand Down
16 changes: 5 additions & 11 deletions menas/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.4.1</version>
<version>${httpclient.version}</version>
</dependency>
<!-- Spark -->
<dependency>
Expand Down Expand Up @@ -237,21 +237,15 @@
<version>${webjars.locator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId>
<version>${oozie.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-hadoop-auth</artifactId>
</exclusion>
</exclusions>
<groupId>za.co.absa.spline</groupId>
<artifactId>client-ui</artifactId>
<version>${spline.ui.version}</version>
</dependency>
<!-- Plot -->
<dependency>
<groupId>org.webjars.bower</groupId>
<artifactId>chart.js</artifactId>
<version>2.7.3</version>
<version>${bower.chart.js.version}</version>
</dependency>
<!-- Data formats -->
<dependency>
Expand Down
4 changes: 2 additions & 2 deletions menas/src/main/resources/application.properties.template
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ menas.environment=Dev

menas.spark.master=local[1]

# The template for Lineage (Spline) display. If empty Lineage button is not shown.
# The api gateway for Lineage (Spline) display. If empty Lineage button is not shown.
# Avoiding the http(s) protocol will cause that the same protocol will be used as for Menas itself, avoiding issues
spline.urlTemplate=//localhost:8080/spline/dataset/lineage/_search?path=%s&application_id=%s
menas.lineage.readApiUrl=//localhost:8080/spline-gw/consumer

#system-wide time zone
timezone=UTC
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.enceladus.menas

import za.co.absa.enceladus.utils.config.ConfigReader

object LineageConfig {
final val config = new ConfigReader()
final val apiUrl: Option[String] = config.readStringConfigIfExist("menas.lineage.readApiUrl").filter(_.trim.nonEmpty)
final val jarName = "spline"
final val subSpace = "lineage"

final val mappingPathForController = "/" + subSpace + "/app/**"
final val resourceHandler = "/" + subSpace + "/**"
final val resourceLocation = "/webjars/" + jarName + "/"
final val executionIdApiTemplate: Option[String] = apiUrl.map(_ + "/execution-events?dataSourceUri=%s&applicationId=%s")

def baseUrl(baseUrlPrefix: String): String = {
val delimiter = if (baseUrlPrefix.takeRight(1) == "/") "" else "/"
s"$baseUrlPrefix$delimiter$subSpace/"
}
}
19 changes: 15 additions & 4 deletions menas/src/main/scala/za/co/absa/enceladus/menas/MvcConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,24 @@

package za.co.absa.enceladus.menas

import org.springframework.context.annotation.Configuration
import org.springframework.web.servlet.config.annotation.ViewControllerRegistry
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer
import org.springframework.context.annotation.{Bean, Configuration}
import org.springframework.web.servlet.config.annotation.{ResourceHandlerRegistry, ViewControllerRegistry, WebMvcConfigurer}
import org.webjars.WebJarAssetLocator

@Configuration
class MvcConfig extends WebMvcConfigurer {
def addViewControllers(registry: ViewControllerRegistry) {
@Bean
def webJarAssetLocator: WebJarAssetLocator = new WebJarAssetLocator()

override def addResourceHandlers(registry: ResourceHandlerRegistry) {
registry
.addResourceHandler(LineageConfig.resourceHandler)
.addResourceLocations(LineageConfig.resourceLocation)
.resourceChain(true)
.addResolver(new WebJarsResourceFuzzyResolver(webJarAssetLocator))
}

override def addViewControllers(registry: ViewControllerRegistry) {
registry.addViewController("/login").setViewName("login")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.enceladus.menas

import org.springframework.web.servlet.resource.WebJarsResourceResolver
import org.webjars.WebJarAssetLocator

class WebJarsResourceFuzzyResolver(webJarAssetLocator: WebJarAssetLocator)
extends WebJarsResourceResolver(webJarAssetLocator) {

private val webjarPartialPathExtractor = """^.*/([^/]+/[^/]+)/?$""".r

override def findWebJarResourcePath(path: String): String = {
Option(super.findWebJarResourcePath(path))
.orElse(findFullPath(path))
.flatMap(webjarPartialPathExtractor.findFirstMatchIn(_).map(_.group(1)))
.orNull
}

private def findFullPath(path: String): Option[String] =
try {
Option(webJarAssetLocator.getFullPath(path))
} catch {
case _: IllegalArgumentException => None
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class WebSecurityConfig @Autowired()(beanFactory: BeanFactory,
.and()
.addFilterBefore(kerberosFilter, classOf[UsernamePasswordAuthenticationFilter])
.addFilterAfter(jwtAuthFilter, classOf[SpnegoAuthenticationProcessingFilter])
.headers()
.frameOptions()
.sameOrigin()
}

@Bean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,21 @@ package za.co.absa.enceladus.menas.controllers

import org.springframework.beans.factory.annotation.Value
import org.springframework.web.bind.annotation.{GetMapping, RequestMapping, RestController}
import za.co.absa.enceladus.menas.LineageConfig

@RestController
@RequestMapping(Array("/api/configuration"))
class ConfigurationController extends BaseController {
@Value("${menas.environment}")
val menasEnvironment: String = ""
private val menasEnvironment: String = ""

@GetMapping(path = Array("/environment"))
def getEnvironment(): String = {
menasEnvironment
}

@GetMapping(path = Array("/lineageExecutionIdApiTemplate"))
def getLineageExecutionIdApiTemplate(): String = {
LineageConfig.executionIdApiTemplate.getOrElse("")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.enceladus.menas.controllers

import javax.servlet.http.HttpServletRequest
import org.apache.commons.io.IOUtils
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.core.io.ClassPathResource
import org.springframework.http.MediaType.TEXT_HTML_VALUE
import org.springframework.stereotype.Controller
import org.springframework.web.bind.annotation.{RequestMapping, ResponseBody}
import org.webjars.WebJarAssetLocator
import za.co.absa.enceladus.menas.LineageConfig

@Controller
class LineageController @Autowired()(webJarAssetLocator: WebJarAssetLocator) {

@RequestMapping(path = Array(LineageConfig.mappingPathForController), produces = Array(TEXT_HTML_VALUE))
@ResponseBody
def index(httpRequest: HttpServletRequest): String = {
val resourceName = webJarAssetLocator.getFullPath(LineageConfig.jarName, "index.html")

val resource = new ClassPathResource(resourceName)

val baseUrl = LineageConfig.baseUrl(httpRequest.getContextPath)
// TODO: Don't use string replaces #1116
IOUtils.toString(resource.getInputStream, "UTF-8")
.replaceAllLiterally(
"/*[[${embeddedMode}]]*/",
"true; //")
.replaceAllLiterally(
"/*[[${apiUrl}]]*/",
s"'${LineageConfig.apiUrl.getOrElse("")}'; //")
.replaceAllLiterally(
"""<base href="/" th:href="@{/}">""",
s"""<base href="$baseUrl">""")
.replaceAllLiterally("/*[+", "")
.replaceAllLiterally("+]*/", "")
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,6 @@ class RunController @Autowired()(runService: RunService) extends BaseController
runService.getLatestRun(datasetName, datasetVersion).map(ControlUtils.asJson)
}

@GetMapping(path = Array("/splineUrl/{datasetName}/{datasetVersion}/{runId}"), produces = Array("text/plain"))
@ResponseStatus(HttpStatus.OK)
def getSplineUrl(@PathVariable datasetName: String,
@PathVariable datasetVersion: Int,
@PathVariable runId: Int): CompletableFuture[String] = {
runService.getSplineUrl(datasetName, datasetVersion, runId)
}

@GetMapping(path = Array("/splineUrlTemplate"), produces = Array("text/plain"))
@ResponseStatus(HttpStatus.OK)
def getSplineUrlTemplate(): CompletableFuture[String] = {
runService.getSplineUrlTemplate()
}

@PostMapping()
@ResponseStatus(HttpStatus.CREATED)
def create(@RequestBody run: Run,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ class RunService @Autowired()(runMongoRepository: RunMongoRepository)
import scala.concurrent.ExecutionContext.Implicits.global
import za.co.absa.enceladus.menas.models.Validation._

@Value("${spline.urlTemplate}")
val splineUrlTemplate: String = ""

def getAllLatest(): Future[Seq[Run]] = {
runMongoRepository.getAllLatest()
}
Expand Down Expand Up @@ -107,17 +104,6 @@ class RunService @Autowired()(runMongoRepository: RunMongoRepository)
}
}

def getSplineUrl(datasetName: String, datasetVersion: Int, runId: Int): Future[String] = {
getRun(datasetName, datasetVersion, runId).map { run =>
val splineRef = run.splineRef
String.format(splineUrlTemplate, splineRef.outputPath, splineRef.sparkApplicationId)
}
}

def getSplineUrlTemplate(): Future[String] = {
Future.successful(splineUrlTemplate)
}

def create(newRun: Run, username: String, retriesLeft: Int = 3): Future[Run] = {
for {
latestOpt <- runMongoRepository.getLatestRun(newRun.dataset, newRun.datasetVersion)
Expand Down
3 changes: 2 additions & 1 deletion menas/src/test/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ menas.environment=localhost

menas.spark.master=local[1]

spline.urlTemplate=http://localhost:8080/spline/dataset/lineage/_search?path=%s&application_id=%s
# Avoiding the http(s) protocol will cause that the same protocol will be used as for Menas itself, avoiding issues
menas.lineage.readApiUrl=//localhost:8080/spline-gw/consumer

#system-wide time zone
timezone=UTC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -694,66 +694,6 @@ class RunApiIntegrationSuite extends BaseRestApiTest {
}
}

s"GET $apiUrl/splineUrl/{datasetName}/{datasetVersion}/{runId}" can {
val endpointBase = s"$apiUrl/splineUrl"

"return 200" when {
"there is a Run of the specified Dataset with the specified runId" should {
"return the Spline URL for the Run" in {
val dataset1run1 = RunFactory.getDummyRun(dataset = "dataset", datasetVersion = 1, runId = 1)
val dataset1run2 = RunFactory.getDummyRun(dataset = "dataset", datasetVersion = 1, runId = 2)
val dataset2run2 = RunFactory.getDummyRun(dataset = "dataset", datasetVersion = 2, runId = 2)
runFixture.add(dataset1run1, dataset1run2, dataset2run2)

val response = sendGet[String](s"$endpointBase/dataset/1/2")

assertOk(response)

val body = response.getBody
assert(body == "http://localhost:8080/spline/dataset/lineage/_search?path=dummyOutputPath&application_id=dummySparkApplicationId")
}
}
}

"return 404" when {
"there is no Run with the specified datasetName" in {
setUpSimpleRun()

val response = sendGet[String](s"$endpointBase/DATASET/1/1")

assertNotFound(response)
}
"there is no Run with the specified datasetVersion" in {
setUpSimpleRun()

val response = sendGet[String](s"$endpointBase/dataset/2/1")

assertNotFound(response)
}
"there is no Run with the specified runId" in {
setUpSimpleRun()

val response = sendGet[String](s"$endpointBase/dataset/1/2")

assertNotFound(response)
}
"the datasetVersion is not a valid numeric type" in {
setUpSimpleRun()

val response = sendGet[String](s"$endpointBase/datasetVersion/1")

assertNotFound(response)
}
"the runId is not a valid numeric type" in {
setUpSimpleRun()

val response = sendGet[String](s"$endpointBase/1/runId")

assertNotFound(response)
}
}
}

s"POST $apiUrl" can {
val endpointBase = s"$apiUrl"

Expand Down
Loading

0 comments on commit 0bd704c

Please sign in to comment.