lennymd · October 19, 2020 17:46
diff --git a/find_last_place.R b/find_last_place.R
 library(tidyverse)

 # Read CSV file
 df <- read_csv("https://raw.githubusercontent.com/lennymd/formulaOneAnalysis/main/data/race_results_statsf1.csv") %>% filter(year == 1950)

 # Trying to calculate for the last place for a race
 get_dead_last <- function(some_race_id) {
  last <- df %>%
    select(race_id, position, driver) %>%
    filter(race_id == some_race_id) %>%
    filter(position != "&") %>%
    distinct(race_id,driver) %>%
    summarise(count=n()) %>%
    pull(count)
  return(last)
 }

 # If I run this for a specific race, I do get the right value. For the first race it should be 23.
 cat(get_dead_last(1))

 # But when I try to apply it to my dataset, I get a huge value for every row.
 # How can I fix that?

 df2 <- df %>% rowwise() %>% mutate(last_place = get_dead_last(race_id))

 # Things seem to work, but if it goes row by row, it'd have to do that 25k times. 
 View(df2)
	library(tidyverse)

	# Read CSV file
	df <- read_csv("https://raw.githubusercontent.com/lennymd/formulaOneAnalysis/main/data/race_results_statsf1.csv") %>% filter(year == 1950)

	# Trying to calculate for the last place for a race
	get_dead_last <- function(some_race_id) {
	last <- df %>%
	select(race_id, position, driver) %>%
	filter(race_id == some_race_id) %>%
	filter(position != "&") %>%
	distinct(race_id,driver) %>%
	summarise(count=n()) %>%
	pull(count)
	return(last)
	}

	# If I run this for a specific race, I do get the right value. For the first race it should be 23.
	cat(get_dead_last(1))

	# But when I try to apply it to my dataset, I get a huge value for every row.
	# How can I fix that?

	df2 <- df %>% rowwise() %>% mutate(last_place = get_dead_last(race_id))

	# Things seem to work, but if it goes row by row, it'd have to do that 25k times.
	View(df2)