Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Endpoint to transcribe videos #554

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
94d8ea4
Endpoint to extract audio from brightcove files
ekrojo77 Dec 5, 2024
f46fd63
Add get endpoint
ekrojo77 Dec 6, 2024
8730288
Update NotFound Error handling in audio-api
ekrojo77 Dec 12, 2024
37edf26
Add transcription client
ekrojo77 Dec 12, 2024
87f3560
Add Transcription endpoints
ekrojo77 Dec 12, 2024
68690a5
Add maxSpeakers and subtitles
ekrojo77 Dec 12, 2024
1178838
Fix test coverage for transcription service
ekrojo77 Dec 12, 2024
461940c
remove unused import
ekrojo77 Dec 12, 2024
e7dedf9
Merge branch 'master' into transcribe-audio
ekrojo77 Dec 12, 2024
2337237
Add 400 if job already exist
ekrojo77 Dec 13, 2024
5cdb331
Clean up folder names and logs
ekrojo77 Dec 13, 2024
d6e64b2
Update descriptions of transcription endpoints
ekrojo77 Dec 13, 2024
47834f4
Update function to send subtitles as response if Get returns 200
ekrojo77 Dec 16, 2024
493e7f1
Remove unused imports
ekrojo77 Dec 16, 2024
0342b7f
Add json return for transcription results
ekrojo77 Dec 16, 2024
7d7e95e
Fix syntax error
ekrojo77 Dec 16, 2024
e0109cd
Add mock for getting transcription results
ekrojo77 Dec 16, 2024
850dbd9
add a temp audio endpoint
ekrojo77 Dec 17, 2024
9fb082a
fix audioprop
ekrojo77 Dec 17, 2024
60efcd7
fix audio endpoints
ekrojo77 Dec 18, 2024
8a24b8d
Fix licensing and unused audioName in get call
ekrojo77 Dec 18, 2024
e54886d
Make creation of subtitle file optional
ekrojo77 Dec 18, 2024
31b918e
fix test
ekrojo77 Dec 18, 2024
5163c80
Update permissions and descriptions, refactor to existing s3 putobject
ekrojo77 Dec 20, 2024
5759c2d
Put urls into props
ekrojo77 Jan 2, 2025
46d4e79
Fix naming and test
ekrojo77 Jan 2, 2025
a2a17f5
Fix naming of props
ekrojo77 Jan 6, 2025
0bd5339
Fix error handling in api
ekrojo77 Jan 6, 2025
628be06
Improve error handling
ekrojo77 Jan 6, 2025
6c355db
Fix unused import and try handling of brightcove client
ekrojo77 Jan 6, 2025
ea32cc9
Remove unused import
ekrojo77 Jan 6, 2025
574bc37
update getVideo after brightcove changes
ekrojo77 Jan 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package no.ndla.audioapi

import com.typesafe.scalalogging.StrictLogging
import no.ndla.common.Environment.prop
import no.ndla.common.configuration.{BaseProps, HasBaseProps}
import no.ndla.database.{DatabaseProps, HasDatabaseProps}
import no.ndla.network.{AuthUser, Domains}
Expand Down Expand Up @@ -36,6 +37,13 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging
val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla")
val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION")

val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET_NAME", s"$Environment.transcribe.ndla")
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved
val TranscribeStorageRegion: Option[String] = propOrNone("TRANSCRIBE_FILE_S3_BUCKET_REGION")

val BrightcoveClientId: String = prop("BRIGHTCOVE_API_CLIENT_ID")
val BrightcoveClientSecret: String = prop("BRIGHTCOVE_API_CLIENT_SECRET")
val BrightcoveAccountId: String = prop("NDLA_BRIGHTCOVE_ACCOUNT_ID")
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved

val SearchServer: String = propOrElse("SEARCH_SERVER", "http://search-audio-api.ndla-local")
val RunWithSignedSearchRequests: Boolean = propOrElse("RUN_WITH_SIGNED_SEARCH_REQUESTS", "true").toBoolean
val SearchIndex: String = propOrElse("SEARCH_INDEX_NAME", "audios")
Expand Down
35 changes: 23 additions & 12 deletions audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository}
import no.ndla.audioapi.service.*
import no.ndla.audioapi.service.search.*
import no.ndla.common.Clock
import no.ndla.common.aws.NdlaS3Client
import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client}
import no.ndla.common.brightcove.NdlaBrightcoveClient
import no.ndla.common.configuration.BaseComponentRegistry
import no.ndla.database.{DBMigrator, DataSource}
import no.ndla.network.NdlaClient
Expand All @@ -38,6 +39,7 @@ class ComponentRegistry(properties: AudioApiProperties)
with HealthController
with AudioController
with SeriesController
with TranscriptionController
with SearchService
with AudioSearchService
with SeriesSearchService
Expand All @@ -54,7 +56,10 @@ class ComponentRegistry(properties: AudioApiProperties)
with DBMigrator
with ErrorHandling
with SwaggerDocControllerConfig
with NdlaS3Client {
with NdlaS3Client
with TranscriptionService
with NdlaAWSTranscribeClient
with NdlaBrightcoveClient {
override val props: AudioApiProperties = properties
override val migrator: DBMigrator = DBMigrator(
new V5__AddAgreementToAudio,
Expand All @@ -63,23 +68,28 @@ class ComponentRegistry(properties: AudioApiProperties)
override val dataSource: HikariDataSource = DataSource.getHikariDataSource
DataSource.connectToDatabase()

lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion)
lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion)
lazy val s3TranscribeClient = new NdlaS3Client(props.TranscribeStorageName, props.TranscribeStorageRegion)
lazy val brightcoveClient = new NdlaBrightcoveClient()
lazy val transcribeClient = new NdlaAWSTranscribeClient(props.TranscribeStorageRegion)

lazy val audioRepository = new AudioRepository
lazy val seriesRepository = new SeriesRepository

lazy val ndlaClient = new NdlaClient
lazy val myndlaApiClient: MyNDLAApiClient = new MyNDLAApiClient

lazy val readService = new ReadService
lazy val writeService = new WriteService
lazy val validationService = new ValidationService
lazy val converterService = new ConverterService
lazy val readService = new ReadService
lazy val writeService = new WriteService
lazy val validationService = new ValidationService
lazy val converterService = new ConverterService
lazy val transcriptionService = new TranscriptionService

lazy val internController = new InternController
lazy val audioApiController = new AudioController
lazy val seriesController = new SeriesController
lazy val healthController = new HealthController
lazy val internController = new InternController
lazy val audioApiController = new AudioController
lazy val seriesController = new SeriesController
lazy val healthController = new HealthController
lazy val transcriptionController = new TranscriptionController

var e4sClient: NdlaE4sClient = Elastic4sClientFactory.getClient(props.SearchServer)
lazy val searchConverterService = new SearchConverterService
Expand All @@ -97,7 +107,8 @@ class ComponentRegistry(properties: AudioApiProperties)
audioApiController,
seriesController,
internController,
healthController
healthController,
transcriptionController
),
SwaggerDocControllerConfig.swaggerInfo
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import cats.implicits.*
import io.circe.generic.auto.*
import no.ndla.audioapi.Props
import no.ndla.audioapi.model.api
import no.ndla.audioapi.model.api.{AudioMetaDomainDumpDTO, ErrorHandling, NotFoundException}
import no.ndla.audioapi.model.api.{AudioMetaDomainDumpDTO, ErrorHandling}
import no.ndla.audioapi.model.domain.AudioMetaInformation
import no.ndla.audioapi.repository.AudioRepository
import no.ndla.audioapi.service.search.{AudioIndexService, SeriesIndexService, TagIndexService}
import no.ndla.audioapi.service.{ConverterService, ReadService}
import no.ndla.common.errors.NotFoundException
import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody
import no.ndla.network.tapir.TapirController
import no.ndla.network.tapir.TapirUtil.errorOutputsFor
Expand Down Expand Up @@ -116,7 +117,7 @@ trait InternController {
.serverLogicPure { id =>
audioRepository.withId(id) match {
case Some(image) => image.asRight
case None => returnLeftError(new NotFoundException(s"Could not find audio with id: '$id'"))
case None => returnLeftError(NotFoundException(s"Could not find audio with id: '$id'"))
}
},
endpoint.post
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Part of NDLA audio-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*
*/

package no.ndla.audioapi.controller
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved

import no.ndla.audioapi.Props
import no.ndla.audioapi.model.api.{JobAlreadyFoundException, TranscriptionResultDTO}
import no.ndla.audioapi.service.{ReadService, TranscriptionService}
import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody
import no.ndla.network.tapir.TapirController
import no.ndla.network.tapir.TapirUtil.errorOutputsFor
import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE
import sttp.tapir.server.ServerEndpoint
import sttp.tapir.{EndpointInput, endpoint, path}
import sttp.tapir.*
import sttp.tapir.generic.auto.schemaForCaseClass

import scala.util.{Failure, Success}
trait TranscriptionController {
this: Props & TapirController & ReadService & TranscriptionService =>
val transcriptionController: TranscriptionController
class TranscriptionController() extends TapirController {

override val serviceName: String = "transcription"
override val prefix: EndpointInput[Unit] = "audio-api" / "v1" / serviceName

private val videoId = path[String]("videoId").description("The video id to transcribe")
private val audioName = path[String]("audioName").description("The audio name to transcribe")
private val audioId = path[Long]("audioId").description("The audio id to transcribe")
private val language = path[String]("language").description("The language to transcribe the video to")
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved
private val maxSpeaker =
query[Int]("maxSpeaker").description("The maximum number of speakers in the video").default(2)
private val format = query[String]("format").description("The format of the audio file").default("mp3")

def postExtractAudio: ServerEndpoint[Any, Eff] = endpoint.post
.summary("Extract audio from video")
.description("Extracts audio from a Brightcove video and uploads it to S3.")
.in(videoId)
.in(language)
.in("extract-audio")
.errorOut(errorOutputsFor(400, 500))
.requirePermission(DRAFT_API_WRITE)
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved
.serverLogicPure { _ =>
{ case (videoId, language) =>
transcriptionService.extractAudioFromVideo(videoId, language) match {
case Success(_) => Right(())
case Failure(ex) => returnLeftError(ex)
}
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Beholder disse i første omgang før vi vet hva som trengs av endepunkt. Men tror det rene transcribe-endepunktet er det som vil bli mest brukt

}

def getAudioExtraction: ServerEndpoint[Any, Eff] = endpoint.get
.summary("Get audio extraction status")
.description("Get the status of the audio extraction from a Brightcove video.")
.in(videoId)
.in(language)
.in("extract-audio")
.errorOut(errorOutputsFor(400, 500))
.requirePermission(DRAFT_API_WRITE)
.serverLogicPure { _ =>
{ case (videoId, language) =>
transcriptionService.getAudioExtractionStatus(videoId, language) match {
case Success(_) => Right(())
case Failure(ex) => returnLeftError(ex)
}
}
}

def postTranscription: ServerEndpoint[Any, Eff] = endpoint.post
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved
.summary("Transcribe video")
.description("Transcribes a video and uploads the transcription to S3.")
.in(videoId)
.in(language)
.in(maxSpeaker)
.errorOut(errorOutputsFor(400, 500))
.requirePermission(DRAFT_API_WRITE)
.serverLogicPure { _ =>
{ case (videoId, language, maxSpeakerOpt) =>
transcriptionService.transcribeVideo(videoId, language, maxSpeakerOpt) match {
case Success(_) => Right(())
case Failure(ex: JobAlreadyFoundException) =>
returnLeftError(ex)
case Failure(ex) => returnLeftError(ex)
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

def getTranscription: ServerEndpoint[Any, Eff] = endpoint.get
.summary("Get the transcription status of a video")
.description("Get the transcription of a video.")
.in(videoId)
.in(language)
.errorOut(errorOutputsFor(400, 404, 405, 500))
.out(jsonBody[TranscriptionResultDTO])
.requirePermission(DRAFT_API_WRITE)
.serverLogicPure { _ =>
{ case (videoId, language) =>
transcriptionService.getVideoTranscription(videoId, language) match {
case Success(Right(transcriptionContent)) =>
Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent)))
case Success(Left(jobStatus)) =>
Right(TranscriptionResultDTO(jobStatus.toString, None))
case Failure(ex: NoSuchElementException) => returnLeftError(ex)
case Failure(ex) => returnLeftError(ex)
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

def postAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.post
.summary("Transcribe audio")
.description("Transcribes a video and uploads the transcription to S3.")
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved
.in("audio")
.in(audioName)
.in(audioId)
.in(language)
.in(maxSpeaker)
.in(format)
.errorOut(errorOutputsFor(400, 500))
.requirePermission(DRAFT_API_WRITE)
.serverLogicPure { _ =>
{ case (audioName, audioId, language, maxSpeakerOpt, format) =>
transcriptionService.transcribeAudio(audioName, audioId, language, maxSpeakerOpt, format) match {
case Success(_) => Right(())
case Failure(ex: JobAlreadyFoundException) =>
returnLeftError(ex)
case Failure(ex) => returnLeftError(ex)
}
}
}

def getAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.get
.summary("Get the transcription status of a video")
.description("Get the transcription of a video.")
gunnarvelle marked this conversation as resolved.
Show resolved Hide resolved
.in("audio")
.in(audioId)
.in(language)
.errorOut(errorOutputsFor(400, 404, 405, 500))
.out(jsonBody[TranscriptionResultDTO])
.requirePermission(DRAFT_API_WRITE)
.serverLogicPure { _ =>
{ case (audioId, language) =>
transcriptionService.getAudioTranscription(audioId, language) match {
case Success(Right(transcriptionContent)) =>
Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent)))
case Success(Left(jobStatus)) =>
Right(TranscriptionResultDTO(jobStatus.toString, None))
case Failure(ex: NoSuchElementException) => returnLeftError(ex)
case Failure(ex) => returnLeftError(ex)
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

override val endpoints: List[ServerEndpoint[Any, Eff]] =
List(
postExtractAudio,
getAudioExtraction,
postTranscription,
getTranscription,
postAudioTranscription,
getAudioTranscription
)
}

}
15 changes: 8 additions & 7 deletions audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ package no.ndla.audioapi.model.api

import no.ndla.audioapi.Props
import no.ndla.common.Clock
import no.ndla.common.errors.{AccessDeniedException, FileTooBigException, ValidationException}
import no.ndla.common.errors.{AccessDeniedException, FileTooBigException, NotFoundException, ValidationException}
import no.ndla.database.DataSource
import no.ndla.network.model.HttpRequestException
import no.ndla.network.tapir.{AllErrors, ErrorBody, TapirErrorHandling, ValidationErrorBody}
Expand Down Expand Up @@ -55,13 +55,14 @@ trait ErrorHandling extends TapirErrorHandling {
if rf.error.rootCause
.exists(x => x.`type` == "search_context_missing_exception" || x.reason == "Cannot parse scroll id") =>
invalidSearchContext
case jafe: JobAlreadyFoundException => ErrorBody(JOB_ALREADY_FOUND, jafe.getMessage, clock.now(), 400)
}

}

class NotFoundException(message: String = "The audio was not found") extends RuntimeException(message)
case class MissingIdException(message: String) extends RuntimeException(message)
case class CouldNotFindLanguageException(message: String) extends RuntimeException(message)
class AudioStorageException(message: String) extends RuntimeException(message)
class LanguageMappingException(message: String) extends RuntimeException(message)
class ImportException(message: String) extends RuntimeException(message)
case class MissingIdException(message: String) extends RuntimeException(message)
case class CouldNotFindLanguageException(message: String) extends RuntimeException(message)
class AudioStorageException(message: String) extends RuntimeException(message)
class LanguageMappingException(message: String) extends RuntimeException(message)
class ImportException(message: String) extends RuntimeException(message)
case class JobAlreadyFoundException(message: String) extends RuntimeException(message)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Part of NDLA audio-api
* Copyright (C) 2024 NDLA
*
* See LICENSE
*
*/

package no.ndla.audioapi.model.api
ekrojo77 marked this conversation as resolved.
Show resolved Hide resolved

import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
import sttp.tapir.Schema.annotations.description

@description("The result of a transcription job")
case class TranscriptionResultDTO(
@description("The status of the transcription job") status: String,
@description("The transcription of the audio") transcription: Option[String]
)
object TranscriptionResultDTO {
implicit val encoder: Encoder[TranscriptionResultDTO] = deriveEncoder
implicit val decoder: Decoder[TranscriptionResultDTO] = deriveDecoder
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@

package no.ndla.audioapi.service

import cats.implicits._
import cats.implicits.*
import no.ndla.audioapi.model.api
import no.ndla.audioapi.model.api.NotFoundException
import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository}
import no.ndla.audioapi.service.search.{SearchConverterService, TagSearchService}
import no.ndla.common.errors.ValidationException
import no.ndla.common.errors.{NotFoundException, ValidationException}

import scala.util.{Failure, Success, Try}

Expand Down
Loading
Loading