Skip to content

Commit

Permalink
Merge pull request #86 from sdcb/feature/2.6
Browse files Browse the repository at this point in the history
Feature/2.6, added PaddleNLP.Lac
  • Loading branch information
sdcb authored Apr 7, 2024
2 parents 18343fe + b19141b commit 9449415
Show file tree
Hide file tree
Showing 39 changed files with 65,954 additions and 229 deletions.
26 changes: 24 additions & 2 deletions PaddleSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{026E4A25-9
docs\detection.md = docs\detection.md
docs\ocr.md = docs\ocr.md
docs\paddle2onnx.md = docs\paddle2onnx.md
docs\paddlenlp-lac.md = docs\paddlenlp-lac.md
docs\rotation-detection.md = docs\rotation-detection.md
EndProjectSection
EndProject
Expand All @@ -80,11 +81,17 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.Paddle2Onnx", "src\Sdc
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.Paddle2Onnx.Tests", "tests\Sdcb.Paddle2Onnx.Tests\Sdcb.Paddle2Onnx.Tests.csproj", "{0432D4F5-1F7E-4A6E-A6DC-4A04C0F8E497}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sdcb.PaddleOCR.Models.LocalV4", "src\Sdcb.PaddleOCR.Models.LocalV4\Sdcb.PaddleOCR.Models.LocalV4.csproj", "{604827F0-00CB-48DC-AD4E-06AE386CD96A}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.LocalV4", "src\Sdcb.PaddleOCR.Models.LocalV4\Sdcb.PaddleOCR.Models.LocalV4.csproj", "{604827F0-00CB-48DC-AD4E-06AE386CD96A}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.Shared", "src\Sdcb.PaddleOCR.Models.Shared\Sdcb.PaddleOCR.Models.Shared.csproj", "{EC79D45E-85D8-40E2-9F95-78AB40977770}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sdcb.PaddleOCR.Models.Local", "src\Sdcb.PaddleOCR.Models.Local\Sdcb.PaddleOCR.Models.Local.csproj", "{0172BD09-B617-4FF6-8221-883029AD4877}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleOCR.Models.Local", "src\Sdcb.PaddleOCR.Models.Local\Sdcb.PaddleOCR.Models.Local.csproj", "{0172BD09-B617-4FF6-8221-883029AD4877}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac", "src\Sdcb.PaddleNLP.Lac\Sdcb.PaddleNLP.Lac.csproj", "{5756186D-613D-4656-B21D-822AB4DD9F8F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac.Tests", "tests\Sdcb.PaddleNLP.Lac.Tests\Sdcb.PaddleNLP.Lac.Tests.csproj", "{19222033-C5E1-4326-A597-75CF2DE4007F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sdcb.PaddleNLP.Lac.Model", "src\Sdcb.PaddleNLP.Lac.Model\Sdcb.PaddleNLP.Lac.Model.csproj", "{640420BD-CE2D-4108-B82D-8B4544FC2FB0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -148,6 +155,18 @@ Global
{0172BD09-B617-4FF6-8221-883029AD4877}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0172BD09-B617-4FF6-8221-883029AD4877}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0172BD09-B617-4FF6-8221-883029AD4877}.Release|Any CPU.Build.0 = Release|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5756186D-613D-4656-B21D-822AB4DD9F8F}.Release|Any CPU.Build.0 = Release|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{19222033-C5E1-4326-A597-75CF2DE4007F}.Release|Any CPU.Build.0 = Release|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{640420BD-CE2D-4108-B82D-8B4544FC2FB0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -172,6 +191,9 @@ Global
{604827F0-00CB-48DC-AD4E-06AE386CD96A} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{EC79D45E-85D8-40E2-9F95-78AB40977770} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{0172BD09-B617-4FF6-8221-883029AD4877} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{5756186D-613D-4656-B21D-822AB4DD9F8F} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
{19222033-C5E1-4326-A597-75CF2DE4007F} = {CA2A775C-763B-4B69-AC5B-4F90DD668E4A}
{640420BD-CE2D-4108-B82D-8B4544FC2FB0} = {B3A59318-2F90-40D4-B995-7D56EB8C50F0}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {083C9A35-8781-4D12-8146-B08E4A61DA8E}
Expand Down
8 changes: 5 additions & 3 deletions build/00-common.linq
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@ static ProjectVersion[] Projects = new[]
new ProjectVersion("Sdcb.Onnx", "1.11.22.423"), // 1.11.22.423
new ProjectVersion("Sdcb.Mkldnn", "0.19"), // 0.19
new ProjectVersion("Sdcb.Paddle2Onnx", "1.0.0.2"), // 1.0.0-rc.2
new ProjectVersion("Sdcb.PaddleInference", "2.5.1"),
new ProjectVersion("Sdcb.PaddleInference", "2.6.0-preview.2"),
new ProjectVersion("Sdcb.PaddleOCR", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Online", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Shared", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.Local", "2.7.0"),
new ProjectVersion("Sdcb.PaddleOCR.Models.LocalV3", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleOCR.Models.LocalV4", "2.7.0.1"),
new ProjectVersion("Sdcb.PaddleDetection", "2.3.3"),
new ProjectVersion("Sdcb.RotationDetector", "1.0.3"),
new ProjectVersion("Sdcb.PaddleDetection", "2.3.3"),
new ProjectVersion("Sdcb.RotationDetector", "1.0.3"),
new ProjectVersion("Sdcb.PaddleNLP.Lac", "1.0.0-preview.6"),
new ProjectVersion("Sdcb.PaddleNLP.Lac.Model", "1.0.0"),
};

static async Task DownloadFile(Uri uri, string localFile, CancellationToken cancellationToken = default)
Expand Down
10 changes: 5 additions & 5 deletions build/01-build-native.linq
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ async Task Main()
await SetupAsync(QueryCancelToken);
//await new LinuxNuGetSource().Process(QueryCancelToken);

string mklDnnUrl = "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/mkldnn.zip";
string mklDnnUrl = "https://paddle-inference-lib.bj.bcebos.com/2.6.0/cxx_c/Windows/CPU/x86-64_avx-mkl-vs2019/paddle_inference_c.zip";

await MakeWin64Onnx(mklDnnUrl, QueryCancelToken);
await MakeWin64Mkldnn(mklDnnUrl, QueryCancelToken);
await MakeWin64Paddle2Onnx(mklDnnUrl, QueryCancelToken);

await MakeWin64PaddleMkl("mkl", mklDnnUrl, QueryCancelToken);
await MakeWin64PaddleOpenblas("openblas", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas.zip", QueryCancelToken);
await MakeWin64PaddleOpenblas("openblas-noavx", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas-noavx.zip", QueryCancelToken);
await MakeWin64PaddleMkl("cuda102_cudnn76_tr72_sm61_75", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu102.zip", QueryCancelToken);
await MakeWin64PaddleMkl("cuda118_cudnn86_tr85_sm86_89", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu118.zip", QueryCancelToken);
//await MakeWin64PaddleOpenblas("openblas", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas.zip", QueryCancelToken);
//await MakeWin64PaddleOpenblas("openblas-noavx", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/openblas-noavx.zip", QueryCancelToken);
//await MakeWin64PaddleMkl("cuda102_cudnn76_tr72_sm61_75", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu102.zip", QueryCancelToken);
//await MakeWin64PaddleMkl("cuda118_cudnn86_tr85_sm86_89", "https://io.starworks.cc:88/paddlesharp/native-libs/2.5.1/cu118.zip", QueryCancelToken);
}

static Task MakeWin64PaddleOpenblas(string ridSuffix, string url, CancellationToken cancellationToken = default)
Expand Down
56 changes: 56 additions & 0 deletions docs/paddlenlp-lac.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Sdcb.PaddleNLP.Lac分词模型

## PaddleNLP Lac模型NuGet包

| 包名 💼 | 版本号 📌 | 描述 📚 |
| ------------------ | ---------------------------------------------------------------------------------------------------------------- | --------- |
| Sdcb.PaddleNLP.Lac | [![NuGet](https://img.shields.io/nuget/v/Sdcb.PaddleNLP.Lac.svg)](https://nuget.org/packages/Sdcb.PaddleNLP.Lac) | 模型自包含 |

# 使用方法及示例

## 需要安装的NuGet包
* Sdcb.PaddleNLP.Lac
* Sdcb.PaddleInference
* Sdcb.PaddleInference.runtime.win64.mkl

## 示例
## 1. 最简单的分词:
```csharp
string input = "我是中国人,我爱我的祖国。";
using ChineseSegmenter segmenter = new();
string[] result = segmenter.Segment(input);
Console.WriteLine(string.Join(",", result)); // 我,是,中国,人,,,我,爱,我的祖国,。
```

## 2. 词性标注:
```csharp
string input = "我爱北京天安门";
using ChineseSegmenter segmenter = new();
WordAndTag[] result = segmenter.Tagging(input);
string labels = string.Join(",", result.Select(x => x.Label));
string words = string.Join(",", result.Select(x => x.Word));
string tags = string.Join(",", result.Select(x => x.Tag));
Console.WriteLine(words); // 我,爱,北京,天安门
Console.WriteLine(labels); // r,v,LOC,LOC
Console.WriteLine(tags); // Pronoun,Verb,LocationName,LocationName
```

## 3. 自定义词库

```csharp
string input = "我爱北京天安门";
using ChineseSegmenter segmenter = new(new ()
{
CustomDictionary = new()
{
{ "北京天安门", WordTag.LocationName },
}
});
WordAndTag[] result = segmenter.Tagging(input);
string labels = string.Join(",", result.Select(x => x.Label));
string words = string.Join(",", result.Select(x => x.Word));
string tags = string.Join(",", result.Select(x => x.Tag));
Console.WriteLine(words); // 我,爱,北京天安门
Console.WriteLine(labels); // r,v,LOC
Console.WriteLine(tags); // Pronoun,Verb,LocationName
```
13 changes: 0 additions & 13 deletions src/Sdcb.PaddleInference/CompilerServices.cs

This file was deleted.

114 changes: 0 additions & 114 deletions src/Sdcb.PaddleInference/Native/PaddleNative.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,108 +16,6 @@ static PaddleNative()
#endif
}

private unsafe struct PdStringArray
{
#pragma warning disable CS0649
public nint Size;
public byte** Data;
#pragma warning restore CS0649

public readonly string[] ToArray()
{
var result = new string[Size];
for (int i = 0; i < Size; ++i)
{
result[i] = ((IntPtr)Data[i]).UTF8PtrToString()!;
}
return result;
}
}

/// <summary>
/// Wrapper for managing arrays of strings.
/// </summary>
public unsafe ref struct PdStringArrayWrapper
{
/// <summary>
/// Pointer to the managed stack array.
/// </summary>
public IntPtr ptr;

/// <summary>
/// Converts the array to an array of strings.
/// </summary>
/// <returns>The array of strings.</returns>
public readonly unsafe string[] ToArray()
{
return ((PdStringArray*)ptr)->ToArray();
}

/// <summary>
/// Releases the unmanaged resources used by the PdStringArrayWrapper,
/// and optionally releases the managed resources.
/// </summary>
public void Dispose()
{
PD_OneDimArrayCstrDestroy(ptr);
ptr = IntPtr.Zero;
}
}

private unsafe struct PdIntArray
{
public nint Size;
public int* Data;

public readonly int[] ToArray()
{
var result = new int[Size];
for (int i = 0; i < Size; ++i)
{
result[i] = Data[i];
}
return result;
}

public unsafe void Dispose()
{
fixed (PdIntArray* ptr = &this)
{
PD_OneDimArrayInt32Destroy((IntPtr)ptr);
}
}
}

/// <summary>
/// Wrapper for managing arrays of integers.
/// </summary>
public ref struct PdIntArrayWrapper
{
/// <summary>
/// Pointer to the managed stack array.
/// </summary>
public IntPtr ptr;

/// <summary>
/// Converts the array to an array of integers.
/// </summary>
/// <returns></returns>
public readonly unsafe int[] ToArray()
{
return ((PdIntArray*)ptr)->ToArray();
}

/// <summary>
/// Releases the unmanaged resources used by the PdIntArrayWrapper,
/// and optionally releases the managed resources.
/// </summary>
public void Dispose()
{
PD_OneDimArrayInt32Destroy(ptr);
ptr = IntPtr.Zero;
}
}

/// <summary>
/// Path of the Paddle Inference C library.
/// </summary>
Expand All @@ -127,16 +25,4 @@ public void Dispose()
#elif NETSTANDARD2_0_OR_GREATER || NET6_0_OR_GREATER || LINQPAD
@"paddle_inference_c";
#endif

[StructLayout(LayoutKind.Sequential)]
internal struct PdCStr
{
public uint Length;
public IntPtr Data;

public override readonly string? ToString()
{
return Data.ANSIToString((int)Length - 1);
}
}
}
Loading

0 comments on commit 9449415

Please sign in to comment.