diff --git a/eng/Versions.props b/eng/Versions.props
index b8f9ec5b74..08e9fc1fe5 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -73,8 +73,8 @@
1.4.1
0.1.0
1.15.0
- 0.102.7
- 2.2.1.1
+ 0.105.0
+ 2.5.1
1.12.4
6.0.2
diff --git a/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs b/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs
index f399efe324..669625e7b8 100644
--- a/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs
+++ b/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs
@@ -87,9 +87,9 @@ public override Tensor forward(Tensor input)
{
// 8bit quantization
using var dispose = torch.NewDisposeScope();
- var weight = this.get_buffer("8bit_weight").to(ScalarType.Float32);
- var zeroPoint = this.get_buffer("zeroPoint").to(ScalarType.Float32);
- var scale = this.get_buffer("scale").to(ScalarType.Float32);
+ var weight = this.get_buffer("8bit_weight")!.to(ScalarType.Float32);
+ var zeroPoint = this.get_buffer("zeroPoint")!.to(ScalarType.Float32);
+ var scale = this.get_buffer("scale")!.to(ScalarType.Float32);
var restoreWeight = (weight - zeroPoint.view(-1, 1)) / scale.view(-1, 1);
// use float32
var result = torch.matmul(input.to(ScalarType.Float32), restoreWeight.T);
@@ -106,17 +106,17 @@ public override Tensor forward(Tensor input)
{
using var dispose = torch.NewDisposeScope();
var weight = this.get_buffer("4bit_weight");
- var weightLower = weight % 16;
- var weightUpper = weight / 16;
+ var weightLower = weight! % 16;
+ var weightUpper = weight! / 16;
weight = torch.cat([weightUpper, weightLower], 0).to(ScalarType.Float32);
weight = weight.view(this._outFeatures, this._inFeatures);
weight -= 8;
var zeroPoint = this.get_buffer("zeroPoint");
- var zeroPointLower = zeroPoint % 16;
- var zeroPointUpper = zeroPoint / 16;
+ var zeroPointLower = zeroPoint! % 16;
+ var zeroPointUpper = zeroPoint! / 16;
zeroPoint = torch.cat([zeroPointUpper, zeroPointLower], 0).to(ScalarType.Float32);
zeroPoint -= 8;
- var scale = this.get_buffer("scale").to(ScalarType.Float32);
+ var scale = this.get_buffer("scale")!.to(ScalarType.Float32);
var restoreWeight = (weight - zeroPoint.view(-1, 1)) / scale.view(-1, 1);
// use float32
var result = torch.matmul(input.to(ScalarType.Float32), restoreWeight.T);
diff --git a/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs b/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs
index 8e06c838d5..fa86164a6a 100644
--- a/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs
+++ b/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs
@@ -109,7 +109,7 @@ public override RotaryEmbeddingOutput forward(RotaryEmbeddingInput input)
var seqLen = input.SeqLen;
// TODO
// can be calculated once and cached
- var invFreq = this.get_buffer("inv_freq").to(x.device);
+ var invFreq = this.get_buffer("inv_freq")!.to(x.device);
var invFreqExpanded = invFreq.unsqueeze(0).unsqueeze(-1);
invFreqExpanded = invFreqExpanded.expand(new long[] { positionIds.shape[0], -1, 1 });
var positionIdsExpanded = positionIds.unsqueeze(1).to(torch.float32);
diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs
index 8568239fc5..cfb9cf48da 100644
--- a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs
+++ b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs
@@ -35,7 +35,7 @@ public class ConvModule : Module
public ConvModule(int inChannel, int outChannel, int kernelSize, int stride = 1, int padding = 0, int dilation = 1, bool bias = true, bool useRelu = true)
: base(nameof(ConvModule))
{
- this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias);
+ this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernel_size: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias);
this.useRelu = useRelu;
if (this.useRelu)
{