cloudyr · s-spavound · Mar 13, 2021 · Mar 13, 2021 · Mar 13, 2021 · Mar 13, 2021
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,14 +1,14 @@
-.travis.yml
-appveyor.yml
-CONTRIBUTING.md
-README.Rmd
-Makefile
-drat.sh
-knitreadme.sh
-^revdep$
-^man-roxygen$
-^.*\.Rproj$
-^\.Rproj\.user$
-^\.github.?
-^cran-comments\.md$
-^CRAN-RELEASE$
+.travis.yml
+appveyor.yml
+CONTRIBUTING.md
+README.Rmd
+Makefile
+drat.sh
+knitreadme.sh
+^revdep$
+^man-roxygen$
+^.*\.Rproj$
+^\.Rproj\.user$
+^\.github.?
+^cran-comments\.md$
+^CRAN-RELEASE$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,24 +1,26 @@
-Package: aws.transcribe
-Type: Package
-Title: Client for 'AWS Transcribe'
-Version: 0.1.3.9000
-Date: 2020-03-10
-Authors@R: c(person("Thomas J.", "Leeper",
-                    role = c("aut"), 
-                    email = "thosjleeper@gmail.com",
-                    comment = c(ORCID = "0000-0003-4097-6326")),
-             person("Antoine", "Sachet", role = c("cre"),
-                    email = "antoine.sac@gmail.com"))
-Description: Client for 'AWS Transcribe' <https://aws.amazon.com/documentation/transcribe>, a cloud transcription service that can convert an audio media file in English and other languages into a text transcript.
-License: GPL (>= 2)
-URL: https://github.com/cloudyr/aws.transcribe
-BugReports: https://github.com/cloudyr/aws.transcribe/issues
-Imports:
-    tools,
-    httr,
-    jsonlite,
-    aws.signature (>= 0.3.4)
-Suggests:
-    testthat
-Encoding: UTF-8
-RoxygenNote: 7.0.2
+Package: aws.transcribe
+Type: Package
+Title: Client for 'AWS Transcribe'
+Version: 0.1.4.0000
+Date: 2021-03-13
+Authors@R: c(person("Thomas J.", "Leeper",
+                    role = c("aut"), 
+                    email = "thosjleeper@gmail.com",
+                    comment = c(ORCID = "0000-0003-4097-6326")),
+             person("Antoine", "Sachet", role = c("cre"),
+                    email = "antoine.sac@gmail.com"),
+             person("Simon", "Spavound", role = c("ctb"), 
+                    email = "simon.spavound@googlemail.com"))
+Description: Client for 'AWS Transcribe' <https://aws.amazon.com/documentation/transcribe>, a cloud transcription service that can convert an audio media file in English and other languages into a text transcript.
+License: GPL (>= 2)
+URL: https://github.com/cloudyr/aws.transcribe
+BugReports: https://github.com/cloudyr/aws.transcribe/issues
+Imports:
+    tools,
+    httr,
+    jsonlite,
+    aws.signature (>= 0.3.4)
+Suggests:
+    testthat
+Encoding: UTF-8
+RoxygenNote: 7.1.1
diff --git a/NEWS.md b/NEWS.md
@@ -1,14 +1,18 @@
-# aws.transcribe (development version)
-
-# aws.transcribe 0.1.3
-
-* Released on CRAN 2020-03-11
-* New maintainer @antoine-sachet
-
-# aws.transcribe 0.1.2
-
-* Finish minimum working example.
-
-# aws.transcribe 0.1.1
-
-* Initial release.
+# aws.transcribe (development version)
+
+# aws.transcribe 0.1.4
+
+* Added optional arguments to `start_transcription()`
+
+# aws.transcribe 0.1.3
+
+* Released on CRAN 2020-03-11
+* New maintainer @antoine-sachet
+
+# aws.transcribe 0.1.2
+
+* Finish minimum working example.
+
+# aws.transcribe 0.1.1
+
+* Initial release.
diff --git a/R/start_transcription.R b/R/start_transcription.R
@@ -1,40 +1,66 @@
-#' @title Start AWS Transcribe Job
-#' @description Start an AWS Transcribe job
-#' @param name A character string specifying a unique name for the transcription job.
-#' @param url A character string specifying a URL for the media file to be transcribed.
-#' @param format A character string specifying the file format. One of: \dQuote{mp3}, \dQuote{mp4}, \dQuote{wav}, \dQuote{flac}.
-#' @param language A character string specifying a language code. Currently defaults to \dQuote{en-US}.
-#' @param hertz Optionally, a numeric value specifying sample rate in Hertz.
-#' @param \dots Additional arguments passed to \code{\link{transcribeHTTP}}.
-#' @return A list containing details of the job. The transcript can be retrieved with \code{\link{get_transcription}}.
-#' @examples
-#' \dontrun{
-#' # start a transcription
-#' ## upload a file to S3
-#' library("aws.s3")
-#' put_object(file = "recording.mp3", bucket = "my-bucket", object = "recording.mp3")
-#' 
-#' ## start trancription
-#' start_transcription("first-example", "https://my-bucket.us-east-1.amazonaws.com/recording.mp3")
-#' }
-#' @seealso \code{\link{get_transcription}}
-#' @importFrom tools file_ext
-#' @export
-start_transcription <-
-function(
-    name,
-    url,
-    format = tools::file_ext(url),
-    language = "en-US",
-    hertz = NULL,
-    ...
-) {
-    bod <- list(Media = list(MediaFileUri = url))
-    bod$MediaFormat <- format
-    bod$LanguageCode <- language
-    if (!is.null(hertz)) {
-        bod$MediaSampleRateHertz <- hertz
-    }
-    bod$TranscriptionJobName <- name
-    transcribeHTTP(action = "StartTranscriptionJob", body = bod, ...)
-}
+#' @title Start AWS Transcribe Job
+#' @description Start an AWS Transcribe job
+#'
+#' @param name A character string specifying a unique name for the transcription job.
+#' @param url A character string specifying a URL for the media file to be transcribed.
+#' @param format A character string specifying the file format. One of: \dQuote{mp3}, \dQuote{mp4}, \dQuote{wav}, \dQuote{flac}.
+#' @param language A character string specifying a language code. Currently defaults to \dQuote{en-US}.
+#' @param hertz Optionally, a numeric value specifying sample rate in Hertz.
+#' @param output_bucket Optionally, a character string specifying the output bucket to place the results of the Amazon Transcribe job in.
+#' @param channel_identification Optionally, a boolean which instructs Amazon Transcribe to process each audio channel separately and then merge the transcription output of each channel into a single transcription. Amazon Transcribe also produces a transcription of each item detected on an audio channel, including the start time and end time of the item and alternative transcriptions of the item including the confidence that Amazon Transcribe has in the transcription. You can't set both \code{show_speaker_labels} and \code{channel_identification} in the same request.
+#' @param show_speaker_labels Optionally, a boolean specifying whether the transcription job uses speaker recognition to identify different speakers in the input audio. Speaker recognition labels individual speakers in the audio file. If you set the \code{show_speaker_labels} field to true, you must also set the maximum number of speaker labels \code{max_speaker_labels} field.  
+#' @param max_speaker_labels Optionally, an integer specifying the maximum number of speakers to identify in the input audio. If there are more speakers in the audio than this number, multiple speakers are identified as a single speaker. If you specify the \code{max_speaker_labels} field, you must set the \code{show_speaker_labels} field to \code{TRUE}. Valid Range: Minimum value of 2. Maximum value of 10.
+#' @param vocabulary_name Optionally, a character string specifying the name of a vocabulary to use when processing the transcription job.
+#' @param more_settings Optionally, a list of additional settings to be passed to Amazon Transcribe.
+#' @param \dots Additional arguments passed to \code{\link{transcribeHTTP}}. 
+#'
+#' @return A list containing details of the job. The transcript can be retrieved with \code{\link{get_transcription}}.
+#' @examples
+#' \dontrun{
+#' # start a transcription
+#' ## upload a file to S3
+#' library("aws.s3")
+#' put_object(file = "recording.mp3", bucket = "my-bucket", object = "recording.mp3")
+#' 
+#' ## start transcription
+#' start_transcription("first-example", "https://my-bucket.us-east-1.amazonaws.com/recording.mp3")
+#' }
+#' @seealso \code{\link{get_transcription}}
+#' @importFrom tools file_ext
+#' @export
+start_transcription <-
+function(
+    name,
+    url,
+    format = tools::file_ext(url),
+    language = "en-US",
+    hertz = NULL,
+    output_bucket = NULL,
+    channel_identification = NULL,
+    show_speaker_labels = NULL,
+    max_speaker_labels = NULL,
+    vocabulary_name = NULL,
+    more_settings = list(),
+    ...
+) {
+    bod <- list(Media = list(MediaFileUri = url))
+    bod$MediaFormat <- format
+    bod$LanguageCode <- language
+    bod$MediaSampleRateHertz <- hertz
+    bod$TranscriptionJobName <- name
+    bod$OutputBucketName <- output_bucket
+
+    stopifnot(is.list(more_settings))
+    settings <- more_settings
+
+    settings$ChannelIdentification <- channel_identification
+    settings$ShowSpeakerLabels <- show_speaker_labels
+    settings$MaxSpeakerLabels <- max_speaker_labels
+    settings$VocabularyName <- vocabulary_name
+
+    if (length(settings)) {
+        bod$Settings <- settings
+    }
+
+    transcribeHTTP(action = "StartTranscriptionJob", body = bod, ...)
+}
diff --git a/README.Rmd b/README.Rmd
@@ -1,65 +1,65 @@
-# Package Template for the cloudyr project
-
-[![CRAN](https://www.r-pkg.org/badges/version/aws.transcribe)](https://cran.r-project.org/package=aws.transcribe)
-![Downloads](https://cranlogs.r-pkg.org/badges/aws.transcribe)
-[![Travis Build Status](https://travis-ci.org/cloudyr/aws.transcribe.png?branch=master)](https://travis-ci.org/cloudyr/aws.transcribe)
-[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/PROJECTNUMBER?svg=true)](https://ci.appveyor.com/project/cloudyr/aws.transcribe)
-[![codecov.io](https://codecov.io/github/cloudyr/aws.transcribe/coverage.svg?branch=master)](https://codecov.io/github/cloudyr/aws.transcribe?branch=master)
-
-**aws.transcribe** is a package for the [AWS Transcribe](https://aws.amazon.com/transcribe/) API.
-
-## Code Examples
-
-To start a transcription, use `start_transcription()` with a "job name" and the URL for the file to be transcribed:
-
-```R
-library("aws.transcribe")
-t1 <- start_transcription("aws-transcribe-example", "https://s3.amazonaws.com/randhunt-transcribe-demo-us-east-1/out.mp3")
-```
-
-Then, wait for the transcription to complete and retrieve it by name using `get_transcription()`:
-
-```{r}
-library("aws.transcribe")
-t1 <- get_transcription("aws-transcribe-example")
-cat(strwrap(t1$Transcriptions[1L], 60), sep = "\n")
-```
-
-That's it!
-
-## Setting up credentials
-
-To use the package, you will need an AWS account and to enter your credentials into R. Your keypair can be generated on the [IAM Management Console](https://aws.amazon.com/) under the heading *Access Keys*. Note that you only have access to your secret key once. After it is generated, you need to save it in a secure location. New keypairs can be generated at any time if yours has been lost, stolen, or forgotten. The [**aws.iam** package](https://github.com/cloudyr/aws.iam) profiles tools for working with IAM, including creating roles, users, groups, and credentials programmatically; it is not needed to *use* IAM credentials.
-
-A detailed description of how credentials can be specified is provided at: https://github.com/cloudyr/aws.signature/. The easiest way is to simply set environment variables on the command line prior to starting R or via an `Renviron.site` or `.Renviron` file, which are used to set environment variables in R during startup (see `? Startup`). They can be also set within R:
-
-```R
-Sys.setenv("AWS_ACCESS_KEY_ID" = "mykey",
-           "AWS_SECRET_ACCESS_KEY" = "mysecretkey",
-           "AWS_DEFAULT_REGION" = "us-east-1",
-           "AWS_SESSION_TOKEN" = "mytoken")
-```
-
-
-## Installation
-
-You can install this package from CRAN or, to install the latest development version, from the cloudyr drat repository:
-
-```R
-# Install from CRAN
-install.packages("aws.transcribe")
-
-# Latest version passing CI tests, from drat repo
-install.packages("aws.transcribe", repos = c(getOption("repos"), "http://cloudyr.github.io/drat"))
-```
-
-You can also pull a potentially unstable version directly from GitHub, using the `remotes` package:
-
-```R
-remotes::install_github("cloudyr/aws.transcribe")
-```
-
-
-
----
-[![cloudyr project logo](https://i.imgur.com/JHS98Y7.png)](https://github.com/cloudyr)
+# R Client for the AWS Transcribe API
+
+[![CRAN](https://www.r-pkg.org/badges/version/aws.transcribe)](https://cran.r-project.org/package=aws.transcribe)
+![Downloads](https://cranlogs.r-pkg.org/badges/aws.transcribe)
+[![Travis Build Status](https://travis-ci.org/cloudyr/aws.transcribe.png?branch=master)](https://travis-ci.org/cloudyr/aws.transcribe)
+[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/PROJECTNUMBER?svg=true)](https://ci.appveyor.com/project/cloudyr/aws.transcribe)
+[![codecov.io](https://codecov.io/github/cloudyr/aws.transcribe/coverage.svg?branch=master)](https://codecov.io/github/cloudyr/aws.transcribe?branch=master)
+
+**aws.transcribe** is a package for the [AWS Transcribe](https://aws.amazon.com/transcribe/) API.
+
+## Code Examples
+
+To start a transcription, use `start_transcription()` with a "job name" and the URL for the file to be transcribed:
+
+```R
+library("aws.transcribe")
+t1 <- start_transcription("aws-transcribe-example", "https://s3.amazonaws.com/randhunt-transcribe-demo-us-east-1/out.mp3")
+```
+
+Then, wait for the transcription to complete and retrieve it by name using `get_transcription()`:
+
+```R
+library("aws.transcribe")
+t1 <- get_transcription("aws-transcribe-example")
+cat(strwrap(t1$Transcriptions[1L], 60), sep = "\n")
+```
+
+That's it!
+
+## Setting up credentials
+
+To use the package, you will need an AWS account and to enter your credentials into R. Your keypair can be generated on the [IAM Management Console](https://aws.amazon.com/) under the heading *Access Keys*. Note that you only have access to your secret key once. After it is generated, you need to save it in a secure location. New keypairs can be generated at any time if yours has been lost, stolen, or forgotten. The [**aws.iam** package](https://github.com/cloudyr/aws.iam) profiles tools for working with IAM, including creating roles, users, groups, and credentials programmatically; it is not needed to *use* IAM credentials.
+
+A detailed description of how credentials can be specified is provided at: https://github.com/cloudyr/aws.signature/. The easiest way is to simply set environment variables on the command line prior to starting R or via an `Renviron.site` or `.Renviron` file, which are used to set environment variables in R during startup (see `? Startup`). They can be also set within R:
+
+```R
+Sys.setenv("AWS_ACCESS_KEY_ID" = "mykey",
+           "AWS_SECRET_ACCESS_KEY" = "mysecretkey",
+           "AWS_DEFAULT_REGION" = "us-east-1",
+           "AWS_SESSION_TOKEN" = "mytoken")
+```
+
+
+## Installation
+
+You can install this package from CRAN or, to install the latest development version, from the cloudyr drat repository:
+
+```R
+# Install from CRAN
+install.packages("aws.transcribe")
+
+# Latest version passing CI tests, from drat repo
+install.packages("aws.transcribe", repos = c(getOption("repos"), "http://cloudyr.github.io/drat"))
+```
+
+You can also pull a potentially unstable version directly from GitHub, using the `remotes` package:
+
+```R
+remotes::install_github("cloudyr/aws.transcribe")
+```
+
+
+
+---
+[![cloudyr project logo](https://i.imgur.com/JHS98Y7.png)](https://github.com/cloudyr)