Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pauldorsch/reconcile dependency graph logic #1183

Merged
merged 13 commits into from
Jun 21, 2024
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Microsoft.ComponentDetection.Contracts;
namespace Microsoft.ComponentDetection.Contracts;

using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -65,6 +65,8 @@ public abstract class FileComponentDetector : IComponentDetector

protected IObservable<IComponentStream> ComponentStreams { get; private set; }

protected virtual bool EnableParallelism { get; set; }

/// <inheritdoc />
public async virtual Task<IndividualDetectorScanResult> ExecuteDetectorAsync(ScanRequest request, CancellationToken cancellationToken = default)
{
Expand Down Expand Up @@ -113,7 +115,7 @@ private async Task<IndividualDetectorScanResult> ProcessAsync(IObservable<Proces
new ExecutionDataflowBlockOptions
{
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, maxThreads),
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
});

var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs);
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ComponentDetection.Contracts/ScanRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ public ScanRequest(DirectoryInfo sourceDirectory, ExcludeDirectoryPredicate dire
public IComponentRecorder ComponentRecorder { get; private set; }

/// <summary>
/// Gets the maximum number of threads to use in parallel for executing the detection.
/// Gets the maximum number of threads to use in parallel for executing the detection, assuming parallelism is
/// enabled for the detector.
/// </summary>
public int MaxThreads { get; private set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ public interface IPythonCommandService
Task<IList<(string PackageString, GitComponent Component)>> ParseFileAsync(string path, string pythonPath = null);

Task<string> GetPythonVersionAsync(string pythonPath = null);

/// <summary>
/// Gets the os type using: https://docs.python.org/3/library/sys.html#sys.platform .
/// </summary>
/// <returns>OS type where the python script runs.</returns>
Task<string> GetOsTypeAsync(string pythonPath = null);
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
Expand All @@ -24,7 +25,7 @@ public class PipDependencySpecification
/// <summary>
/// These are packages that we don't want to evaluate in our graph as they are generally python builtins.
/// </summary>
private static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
public static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
{
"-markerlib",
"pip",
Expand Down Expand Up @@ -154,7 +155,7 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
var conditionalVar = conditionalMatch.Groups[2].Value;
var conditionalOperator = conditionalMatch.Groups[3].Value;
var conditionalValue = conditionalMatch.Groups[4].Value;
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar))
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar) || string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]))
{
continue; // If the variable isn't in the environment, we can't evaluate it.
}
Expand All @@ -175,7 +176,7 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase))
{
// if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition
conditionMet = string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]) || string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
}
else
{
Expand All @@ -195,4 +196,17 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar

return conditionsMet;
}

/// <summary>
/// Common method that can be used to determine whether this package is a valid parent
/// package of another package. Note that this logic is not perfect, it does not
/// respect all of the environment identifiers, nor does it correctly handle extras (it ignores
/// them).
/// </summary>
/// <param name="pythonEnvironmentVariables">List of environment variables used to evaluate the environmant conditions, such as OS this is executing on.</param>
/// <returns>Whether or not this package is valid as a parent package.</returns>
public bool IsValidParentPackage(Dictionary<string, string> pythonEnvironmentVariables) =>
!this.PackageIsUnsafe()
&& this.PackageConditionsMet(pythonEnvironmentVariables)
&& !this.ConditionalDependencySpecifiers.Any(s => s.Contains("extra ==", StringComparison.OrdinalIgnoreCase));
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@

var package = new ZipArchive(await response.Content.ReadAsStreamAsync());

var entry = package.GetEntry($"{name.Replace('-', '_')}-{version}.dist-info/METADATA");
var entryName = $"{name.Replace('-', '_')}-{version}.dist-info/METADATA";

Check warning on line 102 in src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs#L102

Added line #L102 was not covered by tests

// first try case insensitive dicitonary lookup O(1), then attempt case-insensitive match O(entries)
var entry = package.GetEntry(entryName)
?? package.Entries.FirstOrDefault(x => string.Equals(x.FullName, entryName, StringComparison.OrdinalIgnoreCase));

Check warning on line 106 in src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs#L106

Added line #L106 was not covered by tests

// If there is no metadata file, the package doesn't have any declared dependencies
if (entry == null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
Expand Down Expand Up @@ -39,7 +38,7 @@ public PipComponentDetector(

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 9;
public override int Version { get; } = 10;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand All @@ -55,11 +54,7 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);

var pythonPlatformString = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
? "win32"
: RuntimeInformation.IsOSPlatform(OSPlatform.Linux)
? "linux"
: string.Empty;
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,24 @@

private readonly IPipCommandService pipCommandService;
private readonly IEnvironmentVariableService envVarService;
private readonly IPythonCommandService pythonCommandService;
private readonly IPythonResolver pythonResolver;

public PipReportComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IPipCommandService pipCommandService,
IEnvironmentVariableService envVarService,
IPythonCommandService pythonCommandService,
IPythonResolver pythonResolver,
ILogger<PipReportComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.pipCommandService = pipCommandService;
this.envVarService = envVarService;
this.pythonCommandService = pythonCommandService;
this.pythonResolver = pythonResolver;
this.Logger = logger;
}

Expand All @@ -53,7 +59,9 @@

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 2;
public override int Version { get; } = 3;

protected override bool EnableParallelism { get; set; } = true;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand All @@ -75,6 +83,22 @@
return Enumerable.Empty<ProcessRequest>().ToObservable();
}

this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
{
this.Logger.LogInformation($"No python found on system. Python detection will not run.");

Check warning on line 89 in src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs#L88-L89

Added lines #L88 - L89 were not covered by tests

return Enumerable.Empty<ProcessRequest>().ToObservable();

Check warning on line 91 in src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs#L91

Added line #L91 was not covered by tests
}
else
{
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);

var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}

return processRequests;
}

Expand Down Expand Up @@ -169,11 +193,16 @@
// graph ourselves using the requires_dist field.
var dependenciesByPkg = new Dictionary<string, List<PipDependencySpecification>>(StringComparer.OrdinalIgnoreCase);
var nodeReferences = new Dictionary<string, PipReportGraphNode>(StringComparer.OrdinalIgnoreCase);
var pythonEnvVars = this.pythonResolver.GetPythonEnvironmentVariables();

foreach (var package in report.InstallItems)
{
// Normalize the package name to ensure consistency between the package name and the graph nodes.
var normalizedPkgName = PipReportUtilities.NormalizePackageNameFormat(package.Metadata.Name);
if (PipDependencySpecification.PackagesToIgnore.Contains(normalizedPkgName))
pauld-msft marked this conversation as resolved.
Show resolved Hide resolved
{
continue;

Check warning on line 204 in src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs#L203-L204

Added lines #L203 - L204 were not covered by tests
}

var node = new PipReportGraphNode(
new PipComponent(
Expand All @@ -200,7 +229,7 @@
// futures; python_version <= \"2.7\"
// sphinx (!=1.8.0,!=3.1.0,!=3.1.1,>=1.6.5) ; extra == 'docs'
var dependencySpec = new PipDependencySpecification($"Requires-Dist: {dependency}", requiresDist: true);
if (dependencySpec.PackageIsUnsafe())
if (!dependencySpec.IsValidParentPackage(pythonEnvVars))
{
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,11 @@
var match = version.Match(versionResult.StdOut);
return match.Success ? match.Groups[1].Value : null;
}

public async Task<string> GetOsTypeAsync(string pythonPath)
{
var pythonCommand = await this.ResolvePythonAsync(pythonPath);
var versionResult = await this.commandLineInvocationService.ExecuteCommandAsync(pythonCommand, new List<string> { "python3", "python2" }, "-c", "\"import sys; print(sys.platform);\"");

Check warning on line 188 in src/Microsoft.ComponentDetection.Detectors/pip/PythonCommandService.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/PythonCommandService.cs#L186-L188

Added lines #L186 - L188 were not covered by tests
return versionResult.ExitCode == 0 && string.IsNullOrEmpty(versionResult.StdErr) ? versionResult.StdOut.Trim() : null;
}

Check warning on line 190 in src/Microsoft.ComponentDetection.Detectors/pip/PythonCommandService.cs

View check run for this annotation

Codecov / codecov/patch

src/Microsoft.ComponentDetection.Detectors/pip/PythonCommandService.cs#L190

Added line #L190 was not covered by tests
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
var (root, currentNode) = state.ProcessingQueue.Dequeue();

// gather all dependencies for the current node
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => !x.PackageIsUnsafe()).Where(x => x.PackageConditionsMet(this.pythonEnvironmentVariables)).ToList();
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => x.IsValidParentPackage(this.pythonEnvironmentVariables)).ToList();

foreach (var dependencyNode in dependencies)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,26 @@ public void TestPipDependencyRequireDistConditionalDependenciesMet_Linux()

VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}

[TestMethod]
public void TestPipDependencyRequireDistConditionalDependenciesMet_Empty()
{
var specs = new List<(string, bool, PipDependencySpecification)>
{
("Requires-Dist: TestPackage (>=2.0.1) ; python_version == \"3.8\" and sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=2.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.8\"", "and sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=4.0.1) ; python_version == \"3.6\" and sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=4.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.6\"", "and sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"asdf\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"asdf\"" } }),
};

// test null and empty cases should allow packages through
var pythonEnvironmentVariables = new Dictionary<string, string>
{
{ "python_version", null },
{ "sys_platform", string.Empty },
};

VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace Microsoft.ComponentDetection.Detectors.Tests;
public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportComponentDetector>
{
private readonly Mock<IPipCommandService> pipCommandService;
private readonly Mock<IPythonCommandService> pythonCommandService;
private readonly Mock<IPythonResolver> pythonResolver;
private readonly Mock<IEnvironmentVariableService> mockEnvVarService;
private readonly Mock<ILogger<PipReportComponentDetector>> mockLogger;

Expand All @@ -35,6 +37,14 @@ public PipReportComponentDetectorTests()
this.pipCommandService = new Mock<IPipCommandService>();
this.DetectorTestUtility.AddServiceMock(this.pipCommandService);

this.pythonCommandService = new Mock<IPythonCommandService>();
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
this.DetectorTestUtility.AddServiceMock(this.pythonCommandService);

this.pythonResolver = new Mock<IPythonResolver>();
this.pythonResolver.Setup(x => x.GetPythonEnvironmentVariables()).Returns(new Dictionary<string, string>());
this.DetectorTestUtility.AddServiceMock(this.pythonResolver);

this.mockLogger = new Mock<ILogger<PipReportComponentDetector>>();
this.DetectorTestUtility.AddServiceMock(this.mockLogger);

Expand Down Expand Up @@ -434,11 +444,10 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde
var jupyterGraph = graphsByLocations[file1];

var jupyterLabDependencies = jupyterGraph.GetDependenciesForComponent(jupyterComponent.Id);
jupyterLabDependencies.Should().HaveCount(15);
jupyterLabDependencies.Should().HaveCount(12);
jupyterLabDependencies.Should().Contain("async-lru 2.0.4 - pip");
jupyterLabDependencies.Should().Contain("jupyter-server 2.14.0 - pip");
jupyterLabDependencies.Should().Contain("traitlets 5.14.3 - pip");
jupyterLabDependencies.Should().Contain("requests 2.32.2 - pip");
jupyterLabDependencies.Should().Contain("jupyter-lsp 2.2.5 - pip");

var bleachComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("bleach")).Component as PipComponent;
Expand All @@ -447,10 +456,9 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde
bleachComponent.License.Should().Be("Apache Software License");

var bleachDependencies = jupyterGraph.GetDependenciesForComponent(bleachComponent.Id);
bleachDependencies.Should().HaveCount(3);
bleachDependencies.Should().HaveCount(2);
bleachDependencies.Should().Contain("six 1.16.0 - pip");
bleachDependencies.Should().Contain("webencodings 0.5.1 - pip");
bleachDependencies.Should().Contain("tinycss2 1.3.0 - pip");

ComponentRecorderTestUtilities.CheckChild<PipComponent>(
componentRecorder,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --strip-extras
#
azure-core==1.30.0
# via
# azure-identity
# msrest
azure-devops==7.1.0b4
# via -r requirements.in
azure-identity==1.16.0b1
# via -r requirements.in
certifi==2024.2.2
# via
# msrest
# requests
cffi==1.16.0
# via cryptography
charset-normalizer==3.3.2
# via requests
cryptography==42.0.4
# via
# azure-identity
# msal
# pyjwt
gitdb==4.0.11
# via gitpython
gitpython==3.1.42
# via -r requirements.in
idna==3.6
# via requests
isodate==0.6.1
# via msrest
msal==1.27.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.1.0
# via azure-identity
msrest==0.7.1
# via
# -r requirements.in
# azure-devops
oauthlib==3.2.2
# via requests-oauthlib
packaging==23.2
# via msal-extensions
portalocker==2.8.2
# via msal-extensions
pycparser==2.21
# via cffi
pyjwt==2.8.0
# via
# msal
# pyjwt
requests==2.31.0
# via
# azure-core
# msal
# msrest
# requests-oauthlib
requests-oauthlib==1.3.1
# via msrest
six==1.16.0
# via
# azure-core
# isodate
smmap==5.0.1
# via gitdb
typing-extensions==4.9.0
# via azure-core
urllib3==2.2.1
# via requests
Loading
Loading