Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync eng/common directory with azure-sdk-tools repository #9790

Merged
merged 1 commit into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions eng/common/pipelines/templates/steps/verify-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
parameters:
Directory: 'not-specified'

steps:
- task: PowerShell@2
displayName: Link verification check
inputs:
pwsh: true
workingDirectory: $(Build.SourcesDirectory)/${{ parameters.Directory }}
filePath: eng/common/scripts/Verify-Links.ps1
arguments: >
-urls $(dir -r -i *.md) -rootUrl "file://$(Build.SourcesDirectory)/${{ parameters.Directory }}"
244 changes: 244 additions & 0 deletions eng/common/scripts/Verify-Links.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
param (
# url list to verify links. Can either be a http address or a local file request. Local file paths support md and html files.
[string[]] $urls,
# file that contains a set of links to ignore when verifying
[string] $ignoreLinksFile = "$PSScriptRoot/ignore-links.txt",
# switch that will enable devops specific logging for warnings
[switch] $devOpsLogging = $false,
# check the links recurisvely based on recursivePattern
[switch] $recursive = $true,
# recusiving check links for all links verified that begin with this baseUrl, defaults to the folder the url is contained in
[string] $baseUrl = "",
# path to the root of the site for resolving rooted relative links, defaults to host root for http and file directory for local files
[string] $rootUrl = "",
# list of http status codes count as broken links. Defaults to 404.
[array] $errorStatusCodes = @(404),
# flag to allow resolving relative paths or not
[bool] $resolveRelativeLinks = $true
)

$ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress dialog

function NormalizeUrl([string]$url){
if (Test-Path $url) {
$url = "file://" + (Resolve-Path $url).ToString();
}

$uri = [System.Uri]$url;

if ($script:baseUrl -eq "") {
# for base url default to containing directory
$script:baseUrl = (new-object System.Uri($uri, ".")).ToString();
}

if ($script:rootUrl -eq "") {
if ($uri.IsFile) {
# for files default to the containing directory
$script:rootUrl = $script:baseUrl;
}
else {
# for http links default to the root path
$script:rootUrl = new-object System.Uri($uri, "/");
}
}
return $uri
}

function LogWarning
{
if ($devOpsLogging)
{
Write-Host "##vso[task.LogIssue type=warning;]$args"
}
else
{
Write-Warning "$args"
}
}

function ResolveUri ([System.Uri]$referralUri, [string]$link)
{
# If the link is mailto, skip it.
if ($link.StartsWith("mailto:")) {
Write-Verbose "Skipping $link because it is a mailto link."
return $null
}

$linkUri = [System.Uri]$link;
if($resolveRelativeLinks){
if (!$linkUri.IsAbsoluteUri) {
# For rooted paths resolve from the baseUrl
if ($link.StartsWith("/")) {
echo "rooturl = $rootUrl"
$linkUri = new-object System.Uri([System.Uri]$rootUrl, ".$link");
}
else {
$linkUri = new-object System.Uri($referralUri, $link);
}
}
}

$linkUri = [System.Uri]$linkUri.GetComponents([System.UriComponents]::HttpRequestUrl, [System.UriFormat]::SafeUnescaped)
Write-Verbose "ResolvedUri $link to $linkUri"

# If the link is not a web request, like mailto, skip it.
if (!$linkUri.Scheme.StartsWith("http") -and !$linkUri.IsFile) {
Write-Verbose "Skipping $linkUri because it is not http or file based."
return $null
}

if ($null -ne $ignoreLinks -and $ignoreLinks.Contains($link)) {
Write-Verbose "Ignoring invalid link $linkUri because it is in the ignore file."
return $null
}

return $linkUri;
}

function ParseLinks([string]$baseUri, [string]$htmlContent)
{
$hrefRegex = "<a[^>]+href\s*=\s*[""']?(?<href>[^""']*)[""']?"
$regexOptions = [System.Text.RegularExpressions.RegexOptions]"Singleline, IgnoreCase";

$hrefs = [RegEx]::Matches($htmlContent, $hrefRegex, $regexOptions);

#$hrefs | Foreach-Object { Write-Host $_ }

Write-Verbose "Found $($hrefs.Count) raw href's in page $baseUri";
$links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value } | Sort-Object -Unique

#$links | Foreach-Object { Write-Host $_ }

return $links
}

function CheckLink ([System.Uri]$linkUri)
{
if ($checkedLinks.ContainsKey($linkUri)) { return }

Write-Verbose "Checking link $linkUri..."
if ($linkUri.IsFile) {
if (!(Test-Path $linkUri.LocalPath)) {
LogWarning "Link to file does not exist $($linkUri.LocalPath)"
$script:badLinks += $linkUri
}
}
else {
try {
$response = Invoke-WebRequest -Uri $linkUri
$statusCode = $response.StatusCode
if ($statusCode -ne 200) {
Write-Host "[$statusCode] while requesting $linkUri"
}
}
catch {
$statusCode = $_.Exception.Response.StatusCode.value__

if ($statusCode -in $errorStatusCodes) {
LogWarning "[$statusCode] broken link $linkUri"
$script:badLinks += $linkUri
}
else {
if ($null -ne $statusCode) {
Write-Host "[$statusCode] while requesting $linkUri"
}
else {
Write-Host "Exception while requesting $linkUri"
Write-Host $_.Exception.ToString()
}
}
}
}
$checkedLinks[$linkUri] = $true;
}

function GetLinks([System.Uri]$pageUri)
{
if ($pageUri.Scheme.StartsWith("http")) {
try {
$response = Invoke-WebRequest -Uri $pageUri
$content = $response.Content
}
catch {
$statusCode = $_.Exception.Response.StatusCode.value__
Write-Error "Invalid page [$statusCode] $pageUri"
}
}
elseif ($pageUri.IsFile -and (Test-Path $pageUri.LocalPath)) {
$file = $pageUri.LocalPath
if ($file.EndsWith(".md")) {
$content = (ConvertFrom-MarkDown $file).html
}
elseif ($file.EndsWith(".html")) {
$content = Get-Content $file
}
else {
if (Test-Path ($file + "index.html")) {
$content = Get-Content ($file + "index.html")
}
else {
# Fallback to just reading the content directly
$content = Get-Content $file
}
}
}
else {
Write-Error "Don't know how to process uri $pageUri"
}

$links = ParseLinks $pageUri $content

return $links;
}

if ($urls) {
if ($urls.Count -eq 0) {
Write-Host "Usage $($MyInvocation.MyCommand.Name) <urls>";
exit 1;
}
}

if ($PSVersionTable.PSVersion.Major -lt 6)
{
LogWarning "Some web requests will not work in versions of PS earlier then 6. You are running version $($PSVersionTable.PSVersion)."
}

$badLinks = @();
$ignoreLinks = @();
if (Test-Path $ignoreLinksFile)
{
$ignoreLinks = [Array](Get-Content $ignoreLinksFile | ForEach-Object { ($_ -replace "#.*", "").Trim() } | Where-Object { $_ -ne "" })
}

$checkedPages = @{};
$checkedLinks = @{};
$pageUrisToCheck = new-object System.Collections.Queue

foreach ($url in $urls) {
$uri = NormalizeUrl $url
$pageUrisToCheck.Enqueue($uri);
}

while ($pageUrisToCheck.Count -ne 0)
{
$pageUri = $pageUrisToCheck.Dequeue();
if ($checkedPages.ContainsKey($pageUri)) { continue }
$checkedPages[$pageUri] = $true;

$linkUris = GetLinks $pageUri
Write-Host "Found $($linkUris.Count) links on page $pageUri";

foreach ($linkUri in $linkUris) {
CheckLink $linkUri
if ($recursive) {
if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) {
$pageUrisToCheck.Enqueue($linkUri);
}
}
}
}

Write-Host "Found $($checkedLinks.Count) links with $($badLinks.Count) broken"
$badLinks | ForEach-Object { Write-Host " $_" }

exit $badLinks.Count
2 changes: 1 addition & 1 deletion eng/common/scripts/artifact-metadata-parsing.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ function ParseCArtifact($pkg, $workingDirectory) {
}

return New-Object PSObject -Property @{
PackageId = ''
PackageId = 'azure-sdk-for-c'
PackageVersion = $pkgVersion
# Artifact info is always considered deployable for C becasue it is not
# deployed anywhere. Dealing with duplicate tags happens downstream in
Expand Down