Created
April 3, 2023 10:28
-
-
Save arcaravaggi/bbd248159b05e85c395e471e3bee69f2 to your computer and use it in GitHub Desktop.
A wrapper for download and cleaning of IOC bird Master Lists
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A wrapper for download and cleaning of IOC bird lists | |
# This function downloads a given list, defined by the URL, | |
# completes rows with relevant information, and outputs a data frame. | |
# Further functions may be added and a small package developed, in time. | |
# | |
# There's almost certainly a more elegant way of doing this. | |
# | |
# earl = URL of IOC master List (see https://www.worldbirdnames.org/new/ioc-lists/master-list-2); currently defaults to v13.1 | |
# | |
# E.g. | |
# df <- IOCmasteR() | |
IOCmasteR <- function(earl = "https://worldbirdnames.org/master_ioc_list_v13.1.xlsx"){ | |
require(tidyverse) | |
require(janitor) | |
require(readxl) | |
# Download Master List | |
temp = tempfile(fileext = ".xlsx") | |
dataURL <- earl | |
download.file(dataURL, destfile=temp, mode='wb') | |
birds<- read_excel(temp, | |
col_names = T, | |
skip = 3) | |
# Clean column names | |
birds <- clean_names(birds) | |
# Fill relevant columns and thin to single-row records, only | |
# Note that certain fields need to be excluded to prevent erroneous duplication | |
# of discrete data. | |
birds2 <- birds %>% | |
select(-c(parvclass, | |
authority, | |
breeding_range_subregion_s, | |
nonbreeding_range, | |
code, | |
comment)) %>% | |
fill(infraclass) %>% | |
group_by(infraclass) %>% | |
fill(order:species_scientific, .direction = 'downup') %>% | |
group_by(across(infraclass:species_scientific)) %>% | |
reframe(across(everything(), ~if(all(is.na(.x)))NA else na.omit(.x))) | |
# Create comparative df for merge, containing reference columns and those] | |
# previously excluded | |
birds3 <- birds %>% fill(c("infraclass", | |
"order", | |
"family_scientific", | |
"family_english", | |
"genus", | |
"species_scientific", | |
"species_english"), .direction = "down") %>% | |
mutate(name = paste(family_scientific, genus, | |
" ", species_scientific, subspecies)) | |
# Create the same reference column in the filled data frame | |
birds2 <- birds2 %>% | |
mutate(name = paste(family_scientific, genus, | |
" ", species_scientific, subspecies)) | |
# Merge data frames | |
b <- merge(birds2, birds3[c("name", | |
"comment", | |
"code", | |
"breeding_range_subregion_s", | |
"authority", | |
"nonbreeding_range")], | |
by="name", | |
all.x = F) | |
# Re-order columns to something more logical | |
b <- b[, c(2:7, 9, 8, 14, 10, 13, 15, 12, 11)] | |
# Sort data | |
b <- b[order(b$infraclass, | |
b$order, | |
b$family_scientific, | |
b$genus, | |
b$species_scientific, | |
b$subspecies),] | |
return(b) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment