Skip to content

Instantly share code, notes, and snippets.

@Slivicon
Last active August 30, 2024 20:36
Show Gist options
  • Save Slivicon/835244995a5b55f9f26e43d091749f3f to your computer and use it in GitHub Desktop.
Save Slivicon/835244995a5b55f9f26e43d091749f3f to your computer and use it in GitHub Desktop.
Export GitHub issues, images and comments to offline files with an index, using the GH CLI
# Useful for scenarios where you need to have a snapshot of private repo issues exported offline
#
# 1. Download and install the GH CLI https://github.com/cli/cli
# 2. Save this script
# 3. Start a PowerShell CLI and navigate to the folder with the script
# 4. Run ".\Export-GitHub-Issues.ps1" "RepoOwner/Name" "Label"
# 5. It will prompt you to do a device login to your GitHub account to authorize GH CLI
# 6. It will download the issues and comments tagged with the label specified
# in the repo specified (as long as it is readable by your account)
# 7. It will download any images in the issue body or comments
# 8. It will convert the markdown to html and rewrite the image links to use the downloaded local images
# 9. It will create an html file for each issue
# 10. It will generate a basic index.html file with links to each issue html file, enjoy your offline snapshot
#
param(
[Parameter(Mandatory = $true)]
[string]$RepoName,
[Parameter(Mandatory = $true)]
[string]$LabelParam
)
$OutputEncoding = [Console]::OutputEncoding = [Text.UTF8Encoding]::new()
# Function to sanitize filenames
function Get-SanitizedFileName {
param([string]$Name)
return ($Name -replace '[^\w\-\.]', '_')
}
# Function to convert special characters to HTML entities
function Convert-AndEncode {
param([string]$Text)
$Text = [System.Web.HttpUtility]::HtmlEncode($Text)
return Convert-MarkdownToHtml($Text)
}
function Convert-MarkdownToHtml {
param(
[string]$MarkdownContent
)
$htmlContent = Convert-MarkdownListsToHtml($MarkdownContent)
$headingRegex = '^#+\s*(.*)$'
$codeRegex = '`([\s\S]*?)`'
$boldRegex = '\*\*(.*?)\*\*'
$italicRegex = '\*(.*?)\*'
$linkRegex = '([^!])\[([^\]]+)\]\(([^\)]+)\)'
# Convert headings
$htmlContent = $htmlContent -replace $headingRegex, {
$level = $Matches[1].Length
"<h$level>$($Matches[2])</h$level>"
}
$htmlContent = $htmlContent -replace $boldRegex, '<strong>$1</strong>'
$htmlContent = $htmlContent -replace $italicRegex, '<em>$1</em>'
$htmlContent = $htmlContent -replace $codeRegex, '<code>$1</code>'
$htmlContent = $htmlContent -replace $linkRegex, '$1<a href="$3">$2</a>'
$htmlContent = $htmlContent -replace '\n', '<br>' -replace '\r', ''
return $htmlContent
}
function Convert-MarkdownListsToHtml {
param (
[string]$MarkdownContent
)
# Split the Markdown content into lines
$lines = $MarkdownContent -split "`n"
$htmlOutput = ""
$stack = [System.Collections.Generic.Stack[object]]::new() # .NET Stack object
foreach ($line in $lines) {
if ($line -match '^\s*([-*+])\s+(.*)') {
# Unordered list item detected
$indentation = ($line -replace '[^-*+]', '').Length
# Handle nested/un-nested levels
while ($stack.Count -gt 0 -and ($stack.Peek().Type -ne "ul" -or $stack.Peek().Indentation -gt $indentation)) {
$htmlOutput += "</$($stack.Pop().Type)>`n"
}
if ($stack.Count -eq 0 -or $stack.Peek().Type -ne "ul" -or $stack.Peek().Indentation -lt $indentation) {
$htmlOutput += "<ul>`n"
$stack.Push([pscustomobject]@{ Type = "ul"; Indentation = $indentation })
}
$htmlOutput += "<li>$($matches[2])</li>`n"
}
elseif ($line -match '^\s*\d+\.\s+(.*)') {
# Ordered list item detected
$indentation = ($line -replace '[^\d]', '').Length
# Handle nested/un-nested levels
while ($stack.Count -gt 0 -and ($stack.Peek().Type -ne "ol" -or $stack.Peek().Indentation -gt $indentation)) {
$htmlOutput += "</$($stack.Pop().Type)>`n"
}
if ($stack.Count -eq 0 -or $stack.Peek().Type -ne "ol" -or $stack.Peek().Indentation -lt $indentation) {
$htmlOutput += "<ol>`n"
$stack.Push([pscustomobject]@{ Type = "ol"; Indentation = $indentation })
}
$htmlOutput += "<li>$($matches[1])</li>`n"
}
else {
# Non-list content or blank lines
while ($stack.Count -gt 0) {
$htmlOutput += "</$($stack.Pop().Type)>`n"
}
$htmlOutput += "$line`n"
}
}
# Close any remaining open lists
while ($stack.Count -gt 0) {
$htmlOutput += "</$($stack.Pop().Type)>`n"
}
return $htmlOutput
}
function Format-Date {
param([string]$DateString)
$date = [DateTime]::Parse($DateString)
return $date.ToString("dddd, MMMM d, yyyy (h:mm tt zzz)", [System.Globalization.CultureInfo]::GetCultureInfo("en-CA"))
}
function Get-Image {
param (
[string]$Url,
[string]$OutFile,
[string]$Token,
[int]$MaxRetries = 3,
[int]$RetryDelay = 5
)
$headers = @{
"Authorization" = "token $Token"
}
for ($i = 1; $i -le $MaxRetries; $i++) {
try {
Invoke-WebRequest -Uri $Url -OutFile $OutFile -Headers $headers -ErrorAction Stop
return $true
}
catch {
Write-Warning "Attempt $i of $MaxRetries failed to download image from $Url. Error: $_"
if ($i -lt $MaxRetries) {
Start-Sleep -Seconds $RetryDelay
}
}
}
return $false
}
function Get-GitHubAuthToken {
# Try to get the token directly from the 'gh auth token' command
$authToken = & gh auth token
# If the command fails or returns an empty string, prompt the user for login
if (!$authToken) {
Write-Output "Failed to retrieve authentication token. Please log in."
gh auth login
$authToken = & gh auth token
}
return $authToken
}
Write-Output "Getting GitHub Auth token from gh cli"
$ghToken = Get-GitHubAuthToken
# Load System.Web for HTML encoding
Add-Type -AssemblyName System.Web
# Folders
$folderName = Get-SanitizedFilename "$RepoName-$LabelParam"
$imageFolder = Join-Path $folderName "images"
# Check if the output folder exists and prompt for deletion
if (Test-Path -Path $folderName) {
$response = Read-Host "The output folder '$folderName' already exists. Do you want to delete it and continue? (y/n)"
if ($response -eq 'y') {
Remove-Item -Path $folderName -Recurse -Force
Write-Output "Deleted existing folder: $folderName"
}
else {
Write-Output "Aborted. Exiting script."
exit
}
}
function Write-HtmlIndex {
param (
[string]$folderName
)
# Initialize an array to store file names and their corresponding links
$links = @()
# Get all HTML files in the folder
$htmlFiles = Get-ChildItem -Path $folderName -Filter "*.html" | Sort-Object { [regex]::Replace($_.Name, '\d+', { $args[0].Value.PadLeft(20) }) }
# Process each HTML file
foreach ($file in $htmlFiles) {
# Read the content of the file
$content = Get-Content -Path $file.FullName
# Extract headings (modify this based on your HTML structure)
$titles = $content | Select-String -Pattern '<title>(.*?)<\/title>' | ForEach-Object { $_.Matches.Groups[1].Value }
# Create a link for each heading
foreach ($title in $titles) {
$link = @{
Name = $title
Href = $file.Name
}
$links += $link
}
}
# Generate the index HTML
$indexHtml = @"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Table of Contents</title>
<style>
body {
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
line-height: 1.5;
color: #24292e;
max-width: 980px;
margin: 0 auto;
padding: 45px;
}
</style>
</head>
<body>
<h1>$folderName</h1>
<ol>
"@
foreach ($link in $links) {
$indexHtml += " <li><a href='$($link.Href)'>$($link.Name)</a></li>`n"
}
$indexHtml += @"
</ol>
</body>
</html>
"@
# Save the index HTML to a file
$fileName = Join-Path $folderName 'index.html'
$indexHtml | Out-File -FilePath $fileName -Encoding utf8
Write-Output "Index HTML file created: $fileName"
}
# Create main folder
New-Item -ItemType Directory -Force -Path $folderName | Out-Null
# Create images subfolder
New-Item -ItemType Directory -Force -Path $imageFolder | Out-Null
# Get issues
$issues = gh issue list -R $RepoName -l $LabelParam --json "number,title,body,comments,labels,createdAt,updatedAt,author,url,state,assignees" --limit 1000 | ConvertFrom-Json
foreach ($issue in $issues) {
$issueNumber = $issue.number
Write-Output "Processing issue $issueNumber"
$fileName = Join-Path $folderName "$issueNumber.html"
$htmlContent = @"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${RepoName} Issue ${issueNumber}: $(Convert-AndEncode $issue.title)</title>
<style>
body {
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
line-height: 1.5;
color: #24292e;
max-width: 980px;
margin: 0 auto;
padding: 45px;
}
h1 { font-size: 2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }
h2 { font-size: 1.5em; border-bottom: 1px solid #eaecef; padding-bottom: 0.2em; }
h3 { font-size: 1.2em; border-bottom: 1px solid #eaecef; padding-bottom: 0.2em; }
img { max-width: 100%; }
.issue-meta { color: #586069; }
.comment { background-color: #f6f8fa; border: 1px solid #e1e4e8; border-radius: 6px; padding: 15px; margin-bottom: 15px; }
</style>
</head>
<body>
<h1><a href="$($issue.url)">${RepoName} Issue ${issueNumber} ($($issue.state))</a></h1>
<h2>$(Convert-AndEncode $issue.title)</h2>
<div class="issue-meta">
<p><strong>Author: </strong>$($issue.author.login)</p>
<p><strong>Created: </strong>$(Format-Date $issue.createdAt)</p>
<p><strong>Updated: </strong>$(Format-Date $issue.updatedAt)</p>
<p><strong>Labels: </strong>$(Convert-AndEncode ($issue.labels.name -join ', '))</p>
</div>
<h3>Description</h3>
<div class="comment">
$(Convert-AndEncode $issue.body)
</div>
<h3>Comments</h3>
"@
# Process issue body and comments
foreach ($comment in $issue.comments) {
$commentUser = $comment.author.login
$commentCreatedAt = $(Format-Date $comment.CreatedAt)
$commentBody = $comment.body
$htmlContent += @"
<div class=`"comment`">
<p class=`"comment-header`">${commentUser} commented on ${commentCreatedAt}:</p>
<p class='comment-body'>
$(Convert-AndEncode $commentBody)
</p>
</div>
"@
}
# Find image links
$imageLinks = [regex]::Matches($htmlContent, '!\[image\]\((.*?)\)')
foreach ($match in $imageLinks) {
$imageUrl = $match.Groups[1].Value
$imageName = [System.IO.Path]::GetFileName($imageUrl)
# Try to determine extension from content type
try {
$response = Invoke-WebRequest -Uri $imageUrl -Method Get -Headers @{"Authorization" = "token $ghToken" } -ErrorAction Stop
$contentType = $response.Headers['Content-Type']
$imageExt = switch -Wildcard ($contentType) {
"image/jpeg" { ".jpg" }
"image/png" { ".png" }
"image/gif" { ".gif" }
"image/webp" { ".webp" }
default { "" }
}
}
catch {
Write-Warning "Failed to determine content type for $imageUrl"
$imageExt = ""
}
$safeImageName = Get-SanitizedFilename "$imageName$imageExt"
$localImagePath = Join-Path "images" $safeImageName
$fullImagePath = Join-Path $folderName $localImagePath
Write-Output "Downloading $imageUrl"
$downloadSuccess = Get-Image -Url $imageUrl -OutFile $fullImagePath -Token $ghToken
if ($downloadSuccess) {
# Replace image link in HTML
$htmlContent = $htmlContent -replace [regex]::Escape($match.Value), "<img src=`"$localImagePath`" alt=`"$imageName`">"
}
else {
Write-Warning "Failed to download image from $imageUrl after multiple attempts. Keeping original markdown link."
}
}
$htmlContent += "</div>"
$htmlContent += "</body></html>"
# Save HTML file
$htmlContent | Out-File -FilePath $fileName -Encoding utf8
}
Write-Output "Downloaded and formatted output to: $folderName"
Write-HtmlIndex $folderName
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment