Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc Changes #7264

Merged
merged 7 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NuGet.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<add key="mlnet-assets" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/machinelearning-assets/nuget/v3/index.json" />
<add key="dotnet-libraries-transport" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-libraries-transport/nuget/v3/index.json" />
<add key="dotnet8" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet8/nuget/v3/index.json" />
<add key="dotnet9" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet9/nuget/v3/index.json" />
</packageSources>
<packageSourceMapping>
<packageSource key="dotnet-public">
Expand Down Expand Up @@ -47,6 +48,9 @@
<packageSource key="dotnet8">
<package pattern="*" />
</packageSource>
<packageSource key="dotnet9">
<package pattern="*" />
</packageSource>
</packageSourceMapping>
<disabledPackageSources>
<clear />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

<!-- Remove once we have resolved the TorchSharp issue. -->
<ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>None</ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
tarekgh marked this conversation as resolved.
Show resolved Hide resolved
</PropertyGroup>

<ItemGroup>
Expand Down
1 change: 1 addition & 0 deletions eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
<GoogleProtobufVersion>3.27.1</GoogleProtobufVersion>
<LightGBMVersion>3.3.5</LightGBMVersion>
<MicrosoftBclHashCodeVersion>1.1.1</MicrosoftBclHashCodeVersion>
<MicrosoftBclMemoryVersion>9.0.0-rc.1.24431.7</MicrosoftBclMemoryVersion>
tarekgh marked this conversation as resolved.
Show resolved Hide resolved
<MicrosoftCodeAnalysisAnalyzersVersion>3.3.4</MicrosoftCodeAnalysisAnalyzersVersion>
<MicrosoftCodeAnalysisCSharpVersion>4.9.2</MicrosoftCodeAnalysisCSharpVersion>
<MicrosoftDotNetInteractiveVersion>1.0.0-beta.24375.2</MicrosoftDotNetInteractiveVersion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
<TargetFramework>net6.0</TargetFramework>
<IsPackable>false</IsPackable>
<NoWarn>$(NoWarn)</NoWarn>

<!-- Remove once we have resolved the TorchSharp issue. -->
<ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>None</ResolveAssemblyWarnOrErrorOnTargetArchitectureMismatch>
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
</PropertyGroup>

<ItemGroup>
Expand Down
4 changes: 4 additions & 0 deletions src/Microsoft.ML.GenAI.Core/Microsoft.ML.GenAI.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
<LangVersion>preview</LangVersion>
</PropertyGroup>

<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="AutoGen.Core" Version="$(AutoGenVersion)" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="$(SemanticKernelVersion)" />
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.GenAI.Core/Pipeline/CausalLMPipeline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ public virtual IEnumerable<string> GenerateStreaming(

return tokens
// Skip the first _ token automatically added by tokenizer
.Where(t => t.Offset != (0, 0))
.Where(t => !t.Offset.Equals(new Range(0, 0)))
.Select(t => t.Id)
.ToArray();
}));
Expand All @@ -268,13 +268,13 @@ public virtual IEnumerable<string> GenerateStreaming(
var tokenIds = token[0].to_type(ScalarType.Int32).data<int>().ToArray();
var duplicateTokenString = this.Tokenizer switch
{
SentencePieceBpeTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds.Concat(tokenIds), considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
_ => this.Tokenizer.Decode(tokenIds.Concat(tokenIds)) ?? throw new InvalidOperationException("Failed to decode token ids"),
};

var tokenString = this.Tokenizer switch
{
SentencePieceBpeTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
SentencePieceTokenizer bpeTokenizer => bpeTokenizer.Decode(tokenIds, considerSpecialTokens: true) ?? throw new InvalidOperationException("Failed to decode token ids"),
_ => this.Tokenizer.Decode(tokenIds) ?? throw new InvalidOperationException("Failed to decode token ids"),
};

Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.GenAI.LLaMA/LlamaTokenizerHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static TiktokenTokenizer FromPretrained(
string modelFile = "tokenizer.model")
{
var modelFilePath = Path.Join(modelWeightFolder, modelFile);
var preTokenizer = new TiktokenPreTokenizer(new Regex(_re), _specialTokens);
var preTokenizer = new RegexPreTokenizer(new Regex(_re), _specialTokens);
return TiktokenTokenizer.Create(File.OpenRead(modelFilePath), preTokenizer, normalizer: null, specialTokens: _specialTokens);
}
}
4 changes: 4 additions & 0 deletions src/Microsoft.ML.GenAI.LLaMA/Microsoft.ML.GenAI.LLaMA.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
<IsPackable>true</IsPackable>
</PropertyGroup>

<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
<IsPackable>true</IsPackable>
</PropertyGroup>

<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
Expand Down
6 changes: 5 additions & 1 deletion src/Microsoft.ML.GenAI.Phi/Microsoft.ML.GenAI.Phi.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
<IsPackable>true</IsPackable>
</PropertyGroup>

<PropertyGroup Condition="'$(TargetFramework)' == 'net6.0'">
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="TorchSharp.PyBridge" Version="$(TorchSharpPyBridgeVersion)" />
<PackageReference Include="TorchSharp" Version="$(TorchSharpVersion)" />
Expand All @@ -23,5 +27,5 @@
<ItemGroup>
<EmbeddedResource Include="Resource\Config\*.json" />
</ItemGroup>

</Project>
6 changes: 4 additions & 2 deletions src/Microsoft.ML.Tokenizers/EncodedToken.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;

namespace Microsoft.ML.Tokenizers
{
/// <summary>
Expand All @@ -23,15 +25,15 @@ public readonly struct EncodedToken
/// <summary>
/// Gets the offset mapping to the original string.
/// </summary>
public (int Index, int Length) Offset { get; }
public Range Offset { get; }

/// <summary>
/// Construct a new Token object using the token value, Id, and the offset mapping to the original string.
/// </summary>
/// <param name="id">The Id value associated to the token.</param>
/// <param name="value">The token string value.</param>
/// <param name="offset">The offset mapping to the original string.</param>
public EncodedToken(int id, string value, (int, int) offset)
public EncodedToken(int id, string value, Range offset)
{
Id = id;
Offset = offset;
Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.Tokenizers/Microsoft.ML.Tokenizers.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
<PackageReference Include="Microsoft.Bcl.HashCode" Version="$(MicrosoftBclHashCodeVersion)" />
<PackageReference Include="Microsoft.Bcl.Memory" Version="$(MicrosoftBclMemoryVersion)" />
</ItemGroup>

</Project>
Loading
Loading