Skip to content

Instantly share code, notes, and snippets.

@pmartinez8241
Created August 10, 2024 05:40
Show Gist options
  • Save pmartinez8241/74f22293db3a05360f34111c5fdbbaf1 to your computer and use it in GitHub Desktop.
Save pmartinez8241/74f22293db3a05360f34111c5fdbbaf1 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(dplyr)
library(tibble)
library(janitor)
library("lubridate")
library(here)
library(skimr)
library(ggplot2)
#READ CSV FILES
#-------------------------------------------------------------------------------
date_format <- r"{%Y-%m-%d %H:%M:%s}"
setwd(r"{C:\Users\marti\Desktop\data analysis\case_study\data}")
csv_locations <- r"{C:\Users\marti\Desktop\data analysis\case_study\data}"
cyclistic_file <- list.files(path=csv_locations, pattern = "\\.csv$",full.names = TRUE)
bike_share_data <- read_csv(cyclistic_file)
#-------------------------------------------------------------------------------
count(bike_share_data2)
#CLEAN DATE FORMAT
#-------------------------------------------------------------------------------
bike_share_cleaned <- bike_share_data %>%
mutate(started_at = case_when(
is.na(as_date(started_at,"{%Y-%m-%d %H:%M:%s}")) ~ mdy_hms(started_at),
TRUE ~ as_datetime(started_at))) %>%
mutate(ended_at = case_when(
is.na(as_date(ended_at,"{%Y-%m-%d %H:%M:%s}")) ~ mdy_hms(ended_at),
TRUE ~ as_datetime(ended_at)))
#-------------------------------------------------------------------------------
#CLEAN STARTED_AT>ENDED_AT
#-------------------------------------------------------------------------------
bike_share_cleaned <- bike_share_cleaned %>%
filter(started_at<ended_at)
#-------------------------------------------------------------------------------
# columns added for analysis
#-------------------------------------------------------------------------------
bike_share_cleaned <- bike_share_cleaned %>%
mutate(ride_length = as.numeric(difftime(ended_at,started_at)/60))
#-------------------------------------------------------------------------------
quant_no_outliers <- quantile(bike_share_cleaned$ride_length, seq (0,0.999,by=0.001))
test_quantile <- bike_share_cleaned %>% filter(ride_length < as.numeric(quant_no_outliers['99.9%']))
ggplot(test_quantile,aes(x =started_at,y = ride_length))+geom_line()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment