Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Highlight images during text-to-speech #382

Merged
merged 1 commit into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public class TtsNavigator<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
?: ttsEngineProvider.createEmptyPreferences()

val contentIterator =
TtsContentIterator(publication, tokenizerFactory, initialLocator)
TtsUtteranceIterator(publication, tokenizerFactory, initialLocator)

val ttsEngine =
ttsEngineProvider.createEngine(publication, actualInitialPreferences)
Expand Down Expand Up @@ -279,28 +279,24 @@ public class TtsNavigator<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
private fun TtsPlayer.Utterance.toPosition(): Location {
val currentLink = publication.readingOrder[position.resourceIndex]

val utteranceHighlight = publication
val utteranceLocator = publication
.locatorFromLink(currentLink)!!
.copy(
locations = position.locations,
text = Locator.Text(
highlight = text,
before = position.textBefore,
after = position.textAfter
)
text = position.text
)

val tokenHighlight = range
?.let { utteranceHighlight.copy(text = utteranceHighlight.text.substring(it)) }
val tokenLocator = range
?.let { utteranceLocator.copy(text = utteranceLocator.text.substring(it)) }

return Location(
href = Href(currentLink.href),
textBefore = position.textBefore,
textAfter = position.textAfter,
textBefore = position.text.before,
textAfter = position.text.after,
utterance = text,
range = range,
utteranceLocator = utteranceHighlight,
tokenLocator = tokenHighlight
utteranceLocator = utteranceLocator,
tokenLocator = tokenLocator
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ import org.readium.r2.shared.extensions.tryOrNull
import org.readium.r2.shared.publication.Locator

/**
* Plays the content from a [TtsContentIterator] with a [TtsEngine].
* Plays the content from a [TtsUtteranceIterator] with a [TtsEngine].
*/
@ExperimentalReadiumApi
internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
E : TtsEngine.Error, V : TtsEngine.Voice> private constructor(
private val engineFacade: TtsEngineFacade<S, P, E, V>,
private val contentIterator: TtsContentIterator,
private val contentIterator: TtsUtteranceIterator,
initialWindow: UtteranceWindow,
initialPreferences: P
) : Configurable<S, P> {
Expand All @@ -40,7 +40,7 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
suspend operator fun <S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
E : TtsEngine.Error, V : TtsEngine.Voice> invoke(
engine: TtsEngine<S, P, E, V>,
contentIterator: TtsContentIterator,
contentIterator: TtsUtteranceIterator,
initialPreferences: P
): TtsPlayer<S, P, E, V>? {
val initialContext = tryOrNull { contentIterator.startContext() }
Expand All @@ -59,23 +59,23 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
)
}

private suspend fun TtsContentIterator.startContext(): UtteranceWindow? {
val previousUtterance = previousUtterance()
val currentUtterance = nextUtterance()
private suspend fun TtsUtteranceIterator.startContext(): UtteranceWindow? {
val previousUtterance = previous()
val currentUtterance = next()

val startWindow = if (currentUtterance != null) {
UtteranceWindow(
previousUtterance = previousUtterance,
currentUtterance = currentUtterance,
nextUtterance = nextUtterance(),
nextUtterance = next(),
ended = false
)
} else {
val actualCurrentUtterance = previousUtterance ?: return null
val actualPreviousUtterance = previousUtterance()
val actualPreviousUtterance = previous()

// Go back to the end of the iterator.
nextUtterance()
next()

UtteranceWindow(
previousUtterance = actualPreviousUtterance,
Expand Down Expand Up @@ -129,15 +129,14 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
data class Position(
val resourceIndex: Int,
val locations: Locator.Locations,
val textBefore: String?,
val textAfter: String?
val text: Locator.Text
)
}

private data class UtteranceWindow(
val previousUtterance: TtsContentIterator.Utterance?,
val currentUtterance: TtsContentIterator.Utterance,
val nextUtterance: TtsContentIterator.Utterance?,
val previousUtterance: TtsUtteranceIterator.Utterance?,
val currentUtterance: TtsUtteranceIterator.Utterance,
val nextUtterance: TtsUtteranceIterator.Utterance?,
val ended: Boolean = false
)

Expand Down Expand Up @@ -411,19 +410,19 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
val previousUtterance =
try {
// Get previously currentUtterance once more
contentIterator.previousUtterance()
contentIterator.previous()

// Get previously previousUtterance once more
contentIterator.previousUtterance()
contentIterator.previous()

// Get new previous utterance
val previousUtterance = contentIterator.previousUtterance()
val previousUtterance = contentIterator.previous()

// Go to currentUtterance position
contentIterator.nextUtterance()
contentIterator.next()

// Go to nextUtterance position
contentIterator.nextUtterance()
contentIterator.next()

previousUtterance
} catch (e: Exception) {
Expand All @@ -448,7 +447,7 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
}

val nextUtterance = try {
contentIterator.nextUtterance()
contentIterator.next()
} catch (e: Exception) {
onContentError(e)
return
Expand Down Expand Up @@ -498,8 +497,8 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
playContinuous()
}

private suspend fun speakUtterance(utterance: TtsContentIterator.Utterance): E? =
engineFacade.speak(utterance.text, utterance.language, ::onRangeChanged)
private suspend fun speakUtterance(utterance: TtsUtteranceIterator.Utterance): E? =
engineFacade.speak(utterance.utterance, utterance.language, ::onRangeChanged)

private fun onEngineError(error: E) {
playbackMutable.value = playbackMutable.value.copy(
Expand Down Expand Up @@ -535,15 +534,14 @@ internal class TtsPlayer<S : TtsEngine.Settings, P : TtsEngine.Preferences<P>,
private fun isPlaying() =
playbackMutable.value.playWhenReady && playback.value.state == State.Ready

private fun TtsContentIterator.Utterance.ttsPlayerUtterance(): Utterance =
private fun TtsUtteranceIterator.Utterance.ttsPlayerUtterance(): Utterance =
Utterance(
text = text,
text = utterance,
range = null,
position = Utterance.Position(
resourceIndex = resourceIndex,
locations = locations,
textAfter = textAfter,
textBefore = textBefore
text = text
)
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,16 @@ import org.readium.r2.shared.util.tokenizer.TextTokenizer
*
* Not thread-safe.
*/
internal class TtsContentIterator(
internal class TtsUtteranceIterator(
private val publication: Publication,
private val tokenizerFactory: (language: Language?) -> TextTokenizer,
initialLocator: Locator?
) {
data class Utterance(
val utterance: String,
val resourceIndex: Int,
val locations: Locator.Locations,
val text: String,
val textBefore: String?,
val textAfter: String?,
val text: Locator.Text,
val language: Language?
)

Expand Down Expand Up @@ -109,14 +108,14 @@ internal class TtsContentIterator(
/**
* Advances to the previous item and returns it, or null if we reached the beginning.
*/
suspend fun previousUtterance(): Utterance? =
nextUtterance(Direction.Backward)
suspend fun previous(): Utterance? =
next(Direction.Backward)

/**
* Advances to the next item and returns it, or null if we reached the end.
*/
suspend fun nextUtterance(): Utterance? =
nextUtterance(Direction.Forward)
suspend fun next(): Utterance? =
next(Direction.Forward)

private enum class Direction {
Forward, Backward;
Expand All @@ -126,10 +125,10 @@ internal class TtsContentIterator(
* Gets the next utterance in the given [direction], or null when reaching the beginning or the
* end.
*/
private suspend fun nextUtterance(direction: Direction): Utterance? {
private suspend fun next(direction: Direction): Utterance? {
val utterance = utterances.nextIn(direction)
if (utterance == null && loadNextUtterances(direction)) {
return nextUtterance(direction)
return next(direction)
}
return utterance
}
Expand Down Expand Up @@ -167,7 +166,7 @@ internal class TtsContentIterator(
*/
private fun Content.Element.tokenize(): List<Content.Element> {
val contentTokenizer = TextContentTokenizer(
language = this@TtsContentIterator.language,
language = this@TtsUtteranceIterator.language,
textTokenizerFactory = tokenizerFactory,
overrideContentLanguage = overrideContentLanguage
)
Expand All @@ -187,11 +186,10 @@ internal class TtsContentIterator(
?: throw IllegalStateException("Content Element cannot be found in readingOrder.")

return Utterance(
utterance = text,
resourceIndex = resourceIndex,
locations = locator.locations,
text = text,
textBefore = locator.text.before,
textAfter = locator.text.after,
text = locator.text,
language = language
)
}
Expand Down