Skip to content

Instantly share code, notes, and snippets.

@pjmagee
Created July 22, 2024 18:33
Show Gist options
  • Save pjmagee/003b3816fe0372aa4eaa834d2986b051 to your computer and use it in GitHub Desktop.
Save pjmagee/003b3816fe0372aa4eaa834d2986b051 to your computer and use it in GitHub Desktop.
Get all Pages with Galactic Coordinates
class Program
{
private static readonly HttpClient client = new HttpClient();
static async Task Main(string[] args)
{
string category = "Category:Locations_by_grid_square";
List<string> allPages = await GetAllPagesInCategoryAndSubcategories(category);
foreach (string page in allPages)
{
Console.WriteLine($"Page: {page}");
var infobox = await GetInfoboxData(page);
Console.WriteLine($"Infobox: {infobox}\n");
}
}
static async Task<List<string>> GetAllPagesInCategoryAndSubcategories(string category)
{
List<string> allPages = new List<string>();
List<string> subcategories = await GetSubcategories(category);
foreach (string subcategory in subcategories)
{
List<string> pages = await GetAllCategoryMembers (subcategory);
Console.WriteLine ($"Subcategory: {subcategory}");
allPages.AddRange(pages);
}
return allPages;
}
static async Task<List<string>> GetSubcategories(string category)
{
List<string> subcategories = new List<string>();
string cmcontinue = null;
do
{
var result = await GetCategoryMembers(category, cmcontinue);
var query = result.GetProperty("query");
var categoryMembers = query.GetProperty("categorymembers");
foreach (var member in categoryMembers.EnumerateArray())
{
if (member.GetProperty("title").GetString().StartsWith("Category:"))
{
subcategories.Add(member.GetProperty("title").GetString());
}
}
cmcontinue = result.TryGetProperty("continue", out JsonElement continueElement) ? continueElement.GetProperty("cmcontinue").GetString() : null;
} while (cmcontinue != null);
return subcategories;
}
static async Task<List<string>> GetAllCategoryMembers(string category)
{
List<string> members = new List<string>();
string cmcontinue = null;
do
{
var result = await GetCategoryMembers(category, cmcontinue);
var query = result.GetProperty("query");
var categoryMembers = query.GetProperty("categorymembers");
foreach (var member in categoryMembers.EnumerateArray())
{
if (!member.GetProperty("title").GetString().StartsWith("Category:"))
{
members.Add(member.GetProperty("title").GetString());
}
}
cmcontinue = result.TryGetProperty("continue", out JsonElement continueElement) ? continueElement.GetProperty("cmcontinue").GetString() : null;
} while (cmcontinue != null);
return members.Dump("category members");
}
static async Task<JsonElement> GetCategoryMembers(string category, string cmcontinue)
{
var url = "https://starwars.fandom.com/api.php";
var parameters = new Dictionary<string, string>
{
{ "action", "query" },
{ "list", "categorymembers" },
{ "cmtitle", category },
{ "cmlimit", "max" }, // Use 'max' to get as many results as possible per request
{ "format", "json" }
};
if (cmcontinue != null)
{
parameters.Add("cmcontinue", cmcontinue);
}
var content = new FormUrlEncodedContent(parameters);
var response = await client.PostAsync (url, content);
var responseString = await response.Content.ReadAsStringAsync();
var jsonDoc = JsonDocument.Parse (responseString);
return jsonDoc.RootElement;
}
static async Task<string> GetInfoboxData (string pageTitle)
{
var url = "https://starwars.fandom.com/api.php";
var parameters = new Dictionary<string, string>
{
{ "action", "parse" },
{ "page", pageTitle },
{ "prop", "wikitext" },
{ "format", "json" }
};
var content = new FormUrlEncodedContent (parameters);
var response = await client.PostAsync (url, content);
var responseString = await response.Content.ReadAsStringAsync();
var jsonDoc = JsonDocument.Parse (responseString);
var wikitext = jsonDoc.RootElement.GetProperty ("parse").GetProperty ("wikitext").GetRawText();
return ExtractInfobox (wikitext);
}
static string ExtractInfobox (string wikitext)
{
// Simple extraction logic based on typical infobox template
// Adjust the regex pattern to match the actual infobox structure
var infoboxStart = wikitext.IndexOf ("{{Infobox");
if (infoboxStart == -1) return "No infobox found";
var infoboxEnd = wikitext.IndexOf ("}}", infoboxStart);
if (infoboxEnd == -1) return "Incomplete infobox";
var infobox = wikitext.Substring (infoboxStart, infoboxEnd - infoboxStart + 2);
return infobox;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment