diff --git a/eng/Versions.props b/eng/Versions.props index b8f9ec5b74..08e9fc1fe5 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -73,8 +73,8 @@ 1.4.1 0.1.0 1.15.0 - 0.102.7 - 2.2.1.1 + 0.105.0 + 2.5.1 1.12.4 6.0.2 diff --git a/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs b/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs index f399efe324..669625e7b8 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/QuantizedLinear.cs @@ -87,9 +87,9 @@ public override Tensor forward(Tensor input) { // 8bit quantization using var dispose = torch.NewDisposeScope(); - var weight = this.get_buffer("8bit_weight").to(ScalarType.Float32); - var zeroPoint = this.get_buffer("zeroPoint").to(ScalarType.Float32); - var scale = this.get_buffer("scale").to(ScalarType.Float32); + var weight = this.get_buffer("8bit_weight")!.to(ScalarType.Float32); + var zeroPoint = this.get_buffer("zeroPoint")!.to(ScalarType.Float32); + var scale = this.get_buffer("scale")!.to(ScalarType.Float32); var restoreWeight = (weight - zeroPoint.view(-1, 1)) / scale.view(-1, 1); // use float32 var result = torch.matmul(input.to(ScalarType.Float32), restoreWeight.T); @@ -106,17 +106,17 @@ public override Tensor forward(Tensor input) { using var dispose = torch.NewDisposeScope(); var weight = this.get_buffer("4bit_weight"); - var weightLower = weight % 16; - var weightUpper = weight / 16; + var weightLower = weight! % 16; + var weightUpper = weight! / 16; weight = torch.cat([weightUpper, weightLower], 0).to(ScalarType.Float32); weight = weight.view(this._outFeatures, this._inFeatures); weight -= 8; var zeroPoint = this.get_buffer("zeroPoint"); - var zeroPointLower = zeroPoint % 16; - var zeroPointUpper = zeroPoint / 16; + var zeroPointLower = zeroPoint! % 16; + var zeroPointUpper = zeroPoint! / 16; zeroPoint = torch.cat([zeroPointUpper, zeroPointLower], 0).to(ScalarType.Float32); zeroPoint -= 8; - var scale = this.get_buffer("scale").to(ScalarType.Float32); + var scale = this.get_buffer("scale")!.to(ScalarType.Float32); var restoreWeight = (weight - zeroPoint.view(-1, 1)) / scale.view(-1, 1); // use float32 var result = torch.matmul(input.to(ScalarType.Float32), restoreWeight.T); diff --git a/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs b/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs index 8e06c838d5..fa86164a6a 100644 --- a/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs +++ b/src/Microsoft.ML.GenAI.Core/Module/RotaryEmbedding.cs @@ -109,7 +109,7 @@ public override RotaryEmbeddingOutput forward(RotaryEmbeddingInput input) var seqLen = input.SeqLen; // TODO // can be calculated once and cached - var invFreq = this.get_buffer("inv_freq").to(x.device); + var invFreq = this.get_buffer("inv_freq")!.to(x.device); var invFreqExpanded = invFreq.unsqueeze(0).unsqueeze(-1); invFreqExpanded = invFreqExpanded.expand(new long[] { positionIds.shape[0], -1, 1 }); var positionIdsExpanded = positionIds.unsqueeze(1).to(torch.float32); diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs index 8568239fc5..cfb9cf48da 100644 --- a/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs +++ b/src/Microsoft.ML.TorchSharp/AutoFormerV2/ConvModule.cs @@ -35,7 +35,7 @@ public class ConvModule : Module public ConvModule(int inChannel, int outChannel, int kernelSize, int stride = 1, int padding = 0, int dilation = 1, bool bias = true, bool useRelu = true) : base(nameof(ConvModule)) { - this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernelSize: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); + this.conv = nn.Conv2d(in_channels: inChannel, out_channels: outChannel, kernel_size: kernelSize, stride: stride, padding: padding, dilation: dilation, bias: bias); this.useRelu = useRelu; if (this.useRelu) {