Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SpeechToText #808 #1127

Merged
merged 29 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
33d2689
SpeechToText #808
VladislavAntonyuk Apr 4, 2023
7558213
Fix Tizen build
VladislavAntonyuk Apr 4, 2023
cc76975
Update Namespace + Method Naming
brminnick Apr 11, 2023
63b4224
Combine Shared Code for iOS + MacCatalyst
brminnick Apr 11, 2023
b6d40ec
Add `NSSpeechRecognitionUsageDescription` to `Info.plist`
brminnick Apr 11, 2023
44cdc17
Update Sample App
brminnick Apr 12, 2023
4c3d247
Format code
brminnick Apr 12, 2023
6f666d7
`dotnet format`
brminnick Apr 12, 2023
0d38952
Update Windows Error Handling
brminnick Apr 12, 2023
d0ce5f7
Update EssentialsGalleryViewModel.cs
brminnick Apr 12, 2023
7dec3d0
Add Tizen Implementation
JoonghyunCho Apr 12, 2023
7017ff5
Merge branch 'main' into SpeechToText
brminnick Apr 12, 2023
a6ea0cb
Revert `SpeechToTextImplementation` to `public`
brminnick Apr 12, 2023
e5d7b3c
SpeechRecognitionResult
VladislavAntonyuk Apr 22, 2023
309b701
Merge branch 'main' into SpeechToText
VladislavAntonyuk Apr 22, 2023
a3de127
Fix tests
VladislavAntonyuk Apr 24, 2023
ab4d57e
Fix Tizen
VladislavAntonyuk Apr 25, 2023
7823e12
Merge branch 'main' into SpeechToText
VladislavAntonyuk May 5, 2023
54314a8
Remove warnings
VladislavAntonyuk May 5, 2023
0674b65
Merge branch 'SpeechToText' of https://github.com/CommunityToolkit/Ma…
VladislavAntonyuk May 5, 2023
44a0448
Merge branch 'main' into SpeechToText
VladislavAntonyuk May 9, 2023
3bd5653
Merge branch 'main' into SpeechToText
VladislavAntonyuk May 11, 2023
1a312e0
Update Formatting
brminnick May 11, 2023
e7ec25d
Update SpeechToTextImplementation.tizen.cs
brminnick May 11, 2023
24ab5bb
Add microphone permission
VladislavAntonyuk May 13, 2023
4245770
Add support for `MultichannelContent`
brminnick May 16, 2023
5822195
Update the Initialization step for Tizen
JoonghyunCho May 17, 2023
50b9b27
Fix indent
JoonghyunCho May 17, 2023
7052e35
Merge branch 'main' into SpeechToText
brminnick May 18, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions samples/CommunityToolkit.Maui.Sample/AppShell.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public partial class AppShell : Shell
// Add Essentials View Models
CreateViewModelMapping<FileSaverPage, FileSaverViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<FolderPickerPage, FolderPickerViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),
CreateViewModelMapping<SpeechToTextPage, SpeechToTextViewModel, EssentialsGalleryPage, EssentialsGalleryViewModel>(),

// Add Extensions View Models
CreateViewModelMapping<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel, ExtensionsGalleryPage, ExtensionsGalleryViewModel>(),
Expand Down
4 changes: 4 additions & 0 deletions samples/CommunityToolkit.Maui.Sample/MauiProgram.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using CommunityToolkit.Maui.Markup;
using CommunityToolkit.Maui.Media;
using CommunityToolkit.Maui.Sample.Models;
using CommunityToolkit.Maui.Sample.Pages;
using CommunityToolkit.Maui.Sample.Pages.Alerts;
Expand Down Expand Up @@ -153,6 +154,7 @@ static void RegisterViewsAndViewModels(in IServiceCollection services)
// Add Essentials Pages + ViewModels
services.AddTransientWithShellRoute<FileSaverPage, FileSaverViewModel>();
services.AddTransientWithShellRoute<FolderPickerPage, FolderPickerViewModel>();
services.AddTransientWithShellRoute<SpeechToTextPage, SpeechToTextViewModel>();

// Add Extensions Pages + ViewModels
services.AddTransientWithShellRoute<ColorAnimationExtensionsPage, ColorAnimationExtensionsViewModel>();
Expand Down Expand Up @@ -187,6 +189,8 @@ static void RegisterEssentials(in IServiceCollection services)
services.AddSingleton<IDeviceDisplay>(DeviceDisplay.Current);
services.AddSingleton<IFileSaver>(FileSaver.Default);
services.AddSingleton<IFolderPicker>(FolderPicker.Default);
services.AddSingleton<ITextToSpeech>(TextToSpeech.Default);
services.AddSingleton<ISpeechToText>(SpeechToText.Default);
}

static IServiceCollection AddTransientWithShellRoute<TPage, TViewModel>(this IServiceCollection services) where TPage : BasePage<TViewModel>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
<?xml version="1.0" encoding="utf-8" ?>
<pages:BasePage xmlns="http://schemas.microsoft.com/dotnet/2021/maui"
xmlns:x="http://schemas.microsoft.com/winfx/2009/xaml"
xmlns:pages="clr-namespace:CommunityToolkit.Maui.Sample.Pages"
x:Class="CommunityToolkit.Maui.Sample.Pages.Essentials.SpeechToTextPage"
xmlns:vm="clr-namespace:CommunityToolkit.Maui.Sample.ViewModels.Essentials"
xmlns:essentials="clr-namespace:CommunityToolkit.Maui.Sample.Pages.Essentials"
x:TypeArguments="vm:SpeechToTextViewModel"
x:DataType="vm:SpeechToTextViewModel"
Title="SpeechToText">

<ContentPage.Resources>
<essentials:PickerLocaleDisplayConverter x:Key="PickerLocaleDisplayConverter" />
</ContentPage.Resources>

<ScrollView>
<Grid
RowDefinitions="64, 24, 64, 24, auto, 60, 60, 60"
Padding="30,0">

<Label
Grid.Row="0"
Text="SpeechToText allows the user to convert speech to text in real time"/>

<Label
Grid.Row="1"
Text="Locale"
FontAttributes="Bold"/>

<Picker
Grid.Row="2"
ItemsSource="{Binding Locales}"
SelectedItem="{Binding Locale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"
Margin="0,0,0,20">
</Picker>

<Label
Grid.Row="3"
Text="Language Output"
FontAttributes="Bold"/>

<Label
Grid.Row="4"
Text="{Binding RecognitionText}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100"
Margin="0,0,0,20" />

<Button
Grid.Row="5"
Text="Play"
Command="{Binding PlayCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>

<Button
Grid.Row="6"
Text="Listen"
Command="{Binding ListenCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>

<Button
Grid.Row="7"
Text="Stop Listening"
Command="{Binding ListenCancelCommand}"
HorizontalOptions="Center"/>

</Grid>
</ScrollView>

</pages:BasePage>
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using System.Globalization;
using CommunityToolkit.Maui.Converters;
using CommunityToolkit.Maui.Sample.ViewModels.Essentials;

namespace CommunityToolkit.Maui.Sample.Pages.Essentials;

public partial class SpeechToTextPage : BasePage<SpeechToTextViewModel>
{
public SpeechToTextPage(SpeechToTextViewModel viewModel) : base(viewModel)
{
InitializeComponent();
}

protected override async void OnAppearing()
{
base.OnAppearing();

await BindingContext.SetLocalesCommand.ExecuteAsync(null);
}
}

class PickerLocaleDisplayConverter : BaseConverterOneWay<Locale, string>
{
public override string DefaultConvertReturnValue { get; set; } = string.Empty;

public override string ConvertFrom(Locale value, CultureInfo? culture)
{
return $"{value.Language} {value.Name}";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,4 @@ public override async ValueTask LoadViewAsync()

SetHasLazyViewLoaded(true);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,4 @@ public override async ValueTask LoadViewAsync()
{
await base.LoadViewAsync();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />

<uses-permission android:name="android.permission.RECORD_AUDIO" />
</manifest>
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@
</array>
<key>XSAppIconAssets</key>
<string>Assets.xcassets/appicon.appiconset</string>
<key>NSSpeechRecognitionUsageDescription</key>
<string>ISpeechToText Uses Speech Recognition</string>
brminnick marked this conversation as resolved.
Show resolved Hide resolved
</dict>
</plist>
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
<privilege>http://tizen.org/privilege/internet</privilege>
<privilege>http://tizen.org/privilege/mediastorage</privilege>
<privilege>http://tizen.org/privilege/externalstorage</privilege>
<privilege>http://tizen.org/privilege/recorder</privilege>
</privileges>
<dependencies />
<provides-appdefined-privileges />
<feature name="http://tizen.org/feature/microphone">true</feature>
brminnick marked this conversation as resolved.
Show resolved Hide resolved
</manifest>
6 changes: 4 additions & 2 deletions samples/CommunityToolkit.Maui.Sample/Platforms/iOS/Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
</array>
<key>XSAppIconAssets</key>
<string>Assets.xcassets/appicon.appiconset</string>
<key>UIViewControllerBasedStatusBarAppearance</key>
<false/>
<key>UIViewControllerBasedStatusBarAppearance</key>
<false/>
<key>NSSpeechRecognitionUsageDescription</key>
<string>ISpeechToText Uses Speech Recognition</string>
brminnick marked this conversation as resolved.
Show resolved Hide resolved
</dict>
</plist>
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ public EssentialsGalleryViewModel()
: base(new[]
{
SectionModel.Create<FileSaverViewModel>("FileSaver", "Allows the user to save files to the filesystem"),
SectionModel.Create<FolderPickerViewModel>("FolderPicker", "Allows picking folders from the file system")
SectionModel.Create<FolderPickerViewModel>("FolderPicker", "Allows picking folders from the file system"),
SectionModel.Create<SpeechToTextViewModel>("SpeechToText", "Converts speech to text"),
})
{
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using System.Collections.ObjectModel;
using System.Collections.Specialized;
using System.Globalization;
using CommunityToolkit.Maui.Alerts;
using CommunityToolkit.Maui.Media;
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input;

namespace CommunityToolkit.Maui.Sample.ViewModels.Essentials;

public partial class SpeechToTextViewModel : BaseViewModel
{
const string defaultLanguage = "en-US";

readonly ITextToSpeech textToSpeech;
readonly ISpeechToText speechToText;

[ObservableProperty]
Locale? locale;

[ObservableProperty]
string recognitionText = "Welcome to .NET MAUI Community Toolkit!";

public SpeechToTextViewModel(ITextToSpeech textToSpeech, ISpeechToText speechToText)
{
this.textToSpeech = textToSpeech;
this.speechToText = speechToText;

Locales.CollectionChanged += HandleLocalesCollectionChanged;
}

public ObservableCollection<Locale> Locales { get; } = new();

[RelayCommand]
async Task SetLocales()
{
Locales.Clear();

var locales = await textToSpeech.GetLocalesAsync();
foreach (var locale in locales.OrderBy(x => x.Language).ThenBy(x => x.Name))
{
Locales.Add(locale);
}

Locale = Locales.FirstOrDefault(x => x.Language is defaultLanguage) ?? Locales.FirstOrDefault();
}

[RelayCommand]
async Task Play(CancellationToken cancellationToken)
{
await textToSpeech.SpeakAsync(RecognitionText, new()
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved
{
Locale = Locale,
Pitch = 2,
Volume = 1
}, cancellationToken);
}

[RelayCommand(IncludeCancelCommand = true)]
async Task Listen(CancellationToken cancellationToken)
{
const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

try
{
RecognitionText = await speechToText.ListenAsync(CultureInfo.GetCultureInfo(Locale?.Language ?? defaultLanguage), new Progress<string>(partialText =>
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved
{
if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}

RecognitionText += partialText + " ";
}), cancellationToken);
}
catch (TaskCanceledException)
{
await Toast.Make("Listening Stopped by User").Show(CancellationToken.None);
}
catch (Exception e)
{
await Toast.Make(e.Message).Show(CancellationToken.None);
}
finally
{
if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
{
OnPropertyChanged(nameof(Locale));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,7 @@
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>

<PackageReference Include="System.Speech" Version="7.0.0" Condition="'$(TargetFramework)' == 'net7.0-windows10.0.19041.0'" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System.Globalization;

namespace CommunityToolkit.Maui.Media;

/// <summary>
/// Allows the user to convert speech to text in real time.
/// </summary>
public interface ISpeechToText : IAsyncDisposable
{
/// <summary>
/// Converts speech to text in real time.
/// </summary>
/// <param name="culture">Speak language</param>
/// <param name="recognitionResult">Intermediate convertion result.</param>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
/// <returns>Final convertion result</returns>
Task<string> ListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using AVFoundation;
using Speech;

namespace CommunityToolkit.Maui.Media;

/// <summary>
/// Base class for <see cref="SpeechToTextImplementation"/> on iOS + MacCatalyst
/// </summary>
public sealed partial class SpeechToTextImplementation
{
AVAudioEngine? audioEngine;
SFSpeechRecognizer? speechRecognizer;
SFSpeechRecognitionTask? recognitionTask;
SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;

/// <inheritdoc />
public ValueTask DisposeAsync()
{
audioEngine?.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();
recognitionTask?.Dispose();
return ValueTask.CompletedTask;
}


static Task<bool> IsSpeechPermissionAuthorized()
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved
{
var taskResult = new TaskCompletionSource<bool>();
SFSpeechRecognizer.RequestAuthorization(status =>
{
taskResult.SetResult(status is SFSpeechRecognizerAuthorizationStatus.Authorized);
});

return taskResult.Task;
}

void StopRecording()
{
audioEngine?.InputNode.RemoveTapOnBus(new nuint(0));
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved
audioEngine?.Stop();
liveSpeechRequest?.EndAudio();
recognitionTask?.Cancel();
}
}
Loading