Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions .github/actions/infrastructure/markdownlinks/Parse-MarkdownLink.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

#requires -version 7
# Markdig is always available in PowerShell 7
<#
.SYNOPSIS
Parse CHANGELOG files using Markdig to extract links.

.DESCRIPTION
This script uses Markdig.Markdown.Parse to parse all markdown files in the CHANGELOG directory
and extract different types of links (inline links, reference links, etc.).

.PARAMETER ChangelogPath
Path to the CHANGELOG directory. Defaults to ./CHANGELOG

.PARAMETER LinkType
Filter by link type: All, Inline, Reference, AutoLink. Defaults to All.

.EXAMPLE
.\Parse-MarkdownLink.ps1

.EXAMPLE
.\Parse-MarkdownLink.ps1 -LinkType Reference
#>

param(
[string]$ChangelogPath = "./CHANGELOG",
[ValidateSet("All", "Inline", "Reference", "AutoLink")]
[string]$LinkType = "All"
)

Write-Verbose "Using built-in Markdig functionality to parse markdown files"

function Get-LinksFromMarkdownAst {
param(
[Parameter(Mandatory)]
[object]$Node,
[Parameter(Mandatory)]
[string]$FileName,
[System.Collections.ArrayList]$Links
)

if ($null -eq $Links) {
return
}

# Check if current node is a link
if ($Node -is [Markdig.Syntax.Inlines.LinkInline]) {
$linkInfo = [PSCustomObject]@{
Path = $FileName
Line = $Node.Line + 1 # Convert to 1-based line numbering
Column = $Node.Column + 1 # Convert to 1-based column numbering
Url = $Node.Url ?? ""
Text = $Node.FirstChild?.ToString() ?? ""
Type = "Inline"
IsImage = $Node.IsImage
}
[void]$Links.Add($linkInfo)
}
elseif ($Node -is [Markdig.Syntax.Inlines.AutolinkInline]) {
$linkInfo = [PSCustomObject]@{
Path = $FileName
Line = $Node.Line + 1
Column = $Node.Column + 1
Url = $Node.Url ?? ""
Text = $Node.Url ?? ""
Type = "AutoLink"
IsImage = $false
}
[void]$Links.Add($linkInfo)
}
elseif ($Node -is [Markdig.Syntax.LinkReferenceDefinitionGroup]) {
foreach ($refDef in $Node) {
$linkInfo = [PSCustomObject]@{
Path = $FileName
Line = $refDef.Line + 1
Column = $refDef.Column + 1
Url = $refDef.Url ?? ""
Text = $refDef.Label ?? ""
Type = "Reference"
IsImage = $false
}
[void]$Links.Add($linkInfo)
}
}
elseif ($Node -is [Markdig.Syntax.LinkReferenceDefinition]) {
$linkInfo = [PSCustomObject]@{
Path = $FileName
Line = $Node.Line + 1
Column = $Node.Column + 1
Url = $Node.Url ?? ""
Text = $Node.Label ?? ""
Type = "Reference"
IsImage = $false
}
[void]$Links.Add($linkInfo)
}

# For MarkdownDocument (root), iterate through all blocks
if ($Node -is [Markdig.Syntax.MarkdownDocument]) {
foreach ($block in $Node) {
Get-LinksFromMarkdownAst -Node $block -FileName $FileName -Links $Links
}
}
# For block containers, iterate through children
elseif ($Node -is [Markdig.Syntax.ContainerBlock]) {
foreach ($child in $Node) {
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
}
}
# For leaf blocks with inlines, process the inline content
elseif ($Node -is [Markdig.Syntax.LeafBlock] -and $Node.Inline) {
Get-LinksFromMarkdownAst -Node $Node.Inline -FileName $FileName -Links $Links
}
# For inline containers, process all child inlines
elseif ($Node -is [Markdig.Syntax.Inlines.ContainerInline]) {
$child = $Node.FirstChild
while ($child) {
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
$child = $child.NextSibling
}
}
# For other inline elements that might have children
elseif ($Node.PSObject.Properties.Name -contains "FirstChild" -and $Node.FirstChild) {
$child = $Node.FirstChild
while ($child) {
Get-LinksFromMarkdownAst -Node $child -FileName $FileName -Links $Links
$child = $child.NextSibling
}
}
}

function Parse-ChangelogFiles {
param(
[string]$Path
)

if (-not (Test-Path $Path)) {
Write-Error "CHANGELOG directory not found: $Path"
return
}

$markdownFiles = Get-ChildItem -Path $Path -Filter "*.md" -File

if ($markdownFiles.Count -eq 0) {
Write-Warning "No markdown files found in $Path"
return
}

$allLinks = [System.Collections.ArrayList]::new()

foreach ($file in $markdownFiles) {
Write-Verbose "Processing file: $($file.Name)"

try {
$content = Get-Content -Path $file.FullName -Raw -Encoding UTF8

# Parse the markdown content using Markdig
$document = [Markdig.Markdown]::Parse($content, [Markdig.MarkdownPipelineBuilder]::new())

# Extract links from the AST
Get-LinksFromMarkdownAst -Node $document -FileName $file.FullName -Links $allLinks

} catch {
Write-Warning "Error processing file $($file.Name): $($_.Exception.Message)"
}
}

# Filter by link type if specified
if ($LinkType -ne "All") {
$allLinks = $allLinks | Where-Object { $_.Type -eq $LinkType }
}

return $allLinks
}

# Main execution
$links = Parse-ChangelogFiles -Path $ChangelogPath

# Output PowerShell objects
$links
177 changes: 177 additions & 0 deletions .github/actions/infrastructure/markdownlinks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Verify Markdown Links Action

A GitHub composite action that verifies all links in markdown files using PowerShell and Markdig.

## Features

- ✅ Parses markdown files using Markdig (built into PowerShell 7)
- ✅ Extracts all link types: inline links, reference links, and autolinks
- ✅ Verifies HTTP/HTTPS links with configurable timeouts and retries
- ✅ Validates local file references
- ✅ Supports excluding specific URL patterns
- ✅ Provides detailed error reporting with file locations
- ✅ Outputs metrics for CI/CD integration

## Usage

### Basic Usage

```yaml
- name: Verify Markdown Links
uses: ./.github/actions/infrastructure/markdownlinks
with:
path: './CHANGELOG'
```

### Advanced Usage

```yaml
- name: Verify Markdown Links
uses: ./.github/actions/infrastructure/markdownlinks
with:
path: './docs'
fail-on-error: 'true'
timeout: 30
max-retries: 2
exclude-patterns: '*.example.com/*,*://localhost/*'
```

### With Outputs

```yaml
- name: Verify Markdown Links
id: verify-links
uses: ./.github/actions/infrastructure/markdownlinks
with:
path: './CHANGELOG'
fail-on-error: 'false'

- name: Display Results
run: |
echo "Total links: ${{ steps.verify-links.outputs.total-links }}"
echo "Passed: ${{ steps.verify-links.outputs.passed-links }}"
echo "Failed: ${{ steps.verify-links.outputs.failed-links }}"
echo "Skipped: ${{ steps.verify-links.outputs.skipped-links }}"
```

## Inputs

| Input | Description | Required | Default |
|-------|-------------|----------|---------|
| `path` | Path to the directory containing markdown files to verify | No | `./CHANGELOG` |
| `exclude-patterns` | Comma-separated list of URL patterns to exclude from verification | No | `''` |
| `fail-on-error` | Whether to fail the action if any links are broken | No | `true` |
| `timeout` | Timeout in seconds for HTTP requests | No | `30` |
| `max-retries` | Maximum number of retries for failed requests | No | `2` |

## Outputs

| Output | Description |
|--------|-------------|
| `total-links` | Total number of unique links checked |
| `passed-links` | Number of links that passed verification |
| `failed-links` | Number of links that failed verification |
| `skipped-links` | Number of links that were skipped |

## Excluded Link Types

The action automatically skips the following link types:

- **Anchor links** (`#section-name`) - Would require full markdown parsing
- **Email links** (`mailto:user@example.com`) - Cannot be verified without sending email

## GitHub Workflow Test

This section provides a workflow example and instructions for testing the link verification action.

### Testing the Workflow

To test that the workflow properly detects broken links:

1. Make change to this file (e.g., this README.md file already contains one in the [Broken Link Test](#broken-link-test) section)
1. The workflow will run and should fail, reporting the broken link(s)
1. Revert your change to this file
1. Push again to verify the workflow passes

### Example Workflow Configuration

```yaml
name: Verify Links

on:
push:
branches: [ main ]
paths:
- '**/*.md'
pull_request:
branches: [ main ]
paths:
- '**/*.md'
schedule:
# Run weekly to catch external link rot
- cron: '0 0 * * 0'

jobs:
verify-links:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Verify CHANGELOG Links
uses: ./.github/actions/infrastructure/markdownlinks
with:
path: './CHANGELOG'
fail-on-error: 'true'

- name: Verify Documentation Links
uses: ./.github/actions/infrastructure/markdownlinks
with:
path: './docs'
fail-on-error: 'false'
exclude-patterns: '*.internal.example.com/*'
```

## How It Works

1. **Parse Markdown**: Uses `Parse-MarkdownLink.ps1` to extract all links from markdown files using Markdig
2. **Deduplicate**: Groups links by URL to avoid checking the same link multiple times
3. **Verify Links**:
- HTTP/HTTPS links: Makes HEAD/GET requests with configurable timeout and retries
- Local file references: Checks if the file exists relative to the markdown file
- Excluded patterns: Skips links matching the exclude patterns
4. **Report Results**: Displays detailed results with file locations for failed links
5. **Set Outputs**: Provides metrics for downstream steps

## Error Output Example

```
✗ FAILED: https://example.com/broken-link - HTTP 404
Found in: /path/to/file.md:42:15
Found in: /path/to/other.md:100:20

Link Verification Summary
============================================================
Total URLs checked: 150
Passed: 145
Failed: 2
Skipped: 3

Failed Links:
• https://example.com/broken-link
Error: HTTP 404
Occurrences: 2
```

## Requirements

- PowerShell 7+ (includes Markdig)
- Runs on: `ubuntu-latest`, `windows-latest`, `macos-latest`

## Broken Link Test

- [Broken Link](https://github.com/PowerShell/PowerShell/wiki/NonExistentPage404)

## License

Same as the PowerShell repository.
Loading