Skip to content

Commit

Permalink
Gzip embedded resources
Browse files Browse the repository at this point in the history
Reduce assembly size from 200MB -> 83MB
  • Loading branch information
russcam committed May 1, 2024
1 parent 1430942 commit f5aebea
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 6 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ riderModule.iml
/_ReSharper.Caches/

.idea
*.sln.DotSettings.User
*.sln.DotSettings.User

# Don't check in gzipped files
/src/Lingua/LanguageModels/**/*.json.gz
14 changes: 10 additions & 4 deletions src/Lingua/Api/LanguageDetector.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Collections.Concurrent;
using System.IO.Compression;
using System.Text.RegularExpressions;
using Lingua.Internal;
using static Lingua.Api.Language;
Expand Down Expand Up @@ -399,11 +400,16 @@ private static Dictionary<string, float> LoadLanguageModels(Dictionary<Language,

private static Dictionary<string,float> LoadLanguageModel(Language language, int ngramLength)
{
var file = $"Lingua.LanguageModels.{language.IsoCode6391().ToString().ToLowerInvariant()}.{Ngram.GetNgramNameByLength(ngramLength)}s.json";
var isoCode = language.IsoCode6391().ToString().ToLowerInvariant();
var nGramName = Ngram.GetNgramNameByLength(ngramLength);
var file = $"Lingua.LanguageModels.{isoCode}.{nGramName}s.json.gz";
using var stream = typeof(LanguageDetector).Assembly.GetManifestResourceStream(file);
return stream == null
? new Dictionary<string, float>()
: TrainingDataLanguageModel.FromJson(stream);

if (stream is null)
return new Dictionary<string, float>();

using var gzipStream = new GZipStream(stream, CompressionMode.Decompress);
return TrainingDataLanguageModel.FromJson(gzipStream);
}

internal HashSet<Language> FilterLanguagesByRules(List<string> words)
Expand Down
64 changes: 63 additions & 1 deletion src/Lingua/Lingua.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,67 @@
<ItemGroup>
<InternalsVisibleTo Include="$(AssemblyName).Tests" Key="$(PublicKey)" />
</ItemGroup>


<!-- Gzip embedded resources to reduce size -->
<PropertyGroup>
<Deterministic>true</Deterministic>
</PropertyGroup>
<Import Project="Sdk.targets" Sdk="Microsoft.NET.Sdk" />
<UsingTask TaskName="GZip"
TaskFactory="RoslynCodeTaskFactory"
AssemblyFile="$(MSBuildToolsPath)\Microsoft.Build.Tasks.Core.dll">
<ParameterGroup>
<Files ParameterType="Microsoft.Build.Framework.ITaskItem[]"
Required="true" />
<Result ParameterType="Microsoft.Build.Framework.ITaskItem[]"
Output="true" />
</ParameterGroup>
<Task>
<Using Namespace="System.IO" />
<Using Namespace="System.IO.Compression" />
<Code Type="Fragment" Language="cs">
<![CDATA[
if (Files.Length > 0)
{
Result = new TaskItem[Files.Length];
for (int i = 0; i < Files.Length; i++)
{
ITaskItem item = Files[i];
string sourcePath = item.GetMetadata("FullPath");
string sourceItemSpec = item.ItemSpec;
string destinationSuffix = ".gz";
string destinationPath = sourcePath + destinationSuffix;
string destinationItemSpec = sourceItemSpec + destinationSuffix;
//Log.LogMessage(MessageImportance.Normal, "EmbeddedResource Src : " + sourceItemSpec);
using (var sourceStream = File.OpenRead(sourcePath))
using (var destinationStream = File.OpenWrite(destinationPath))
using (var destinationGZip = new GZipStream(destinationStream, CompressionLevel.Optimal))
{
sourceStream.CopyTo(destinationGZip);
}
var destinationItem = new TaskItem(destinationItemSpec);
//Log.LogMessage(MessageImportance.Normal, "EmbeddedResource GZip: " + destinationItem.ItemSpec);
Result[i] = destinationItem;
}
}
]]>
</Code>
</Task>
</UsingTask>
<Target Name="BeforeBuild">
<GZip Files="@(EmbeddedResource)">
<Output ItemName="GZipEmbeddedResource" TaskParameter="Result" />
</GZip>
<Message Text="Source EmbeddedResources: @(EmbeddedResource)" Importance="High" />
<Message Text="GZipped EmbeddedResources: @(GZipEmbeddedResource)" Importance="High" />
<ItemGroup>
<EmbeddedResource Remove="@(EmbeddedResource)" />
<EmbeddedResource Include="@(GZipEmbeddedResource)" />
</ItemGroup>
</Target>

</Project>

0 comments on commit f5aebea

Please sign in to comment.