Skip to content

Instantly share code, notes, and snippets.

@black-tea
Created January 7, 2016 19:34
Show Gist options
  • Save black-tea/4c0cbafc145800d86db9 to your computer and use it in GitHub Desktop.
Save black-tea/4c0cbafc145800d86db9 to your computer and use it in GitHub Desktop.
This script summarizes volume data we obtained from the RIITS program. It filters data for only those sensors performing above 90% reporting and then sums over each LinkID for the day/week.
library(data.table)
library(fasttime)
#input
week1.file = "Z:/GIS/DataLibrary/Riits/TrafficVolumes/data dump/EXPORT - 2015/october.dsv"
#parameters that will apply to the data
chunkSize <- 500000
#date ranges
week1.mon <- as.POSIXct("2015-10-19 00:00:00")
week1.tue <- as.POSIXct("2015-10-20 00:00:00")
week1.wed <- as.POSIXct("2015-10-21 00:00:00")
week1.thu <- as.POSIXct("2015-10-22 00:00:00")
week1.fri <- as.POSIXct("2015-10-23 00:00:00")
week1.sat <- as.POSIXct("2015-10-24 00:00:00")
#this will be the list of dataframes for each day
week1.Mon.list <- list()
week1.Tue.list <- list()
week1.Wed.list <- list()
week1.Thu.list <- list()
week1.Fri.list <- list()
print("Begin Processing")
#First Chunk (with headers)
dataChunk <- fread(week1.file, nrows=chunkSize, header=T, sep="|", stringsAsFactors = T, drop=c(1,4,5,7))
colnames <- names(dataChunk)
index <- 0
repeat {
index <- index + 1
print(paste('Processing rows:', index * chunkSize))
#convert chr to time, add to df if within time period & if agency is "LADOT"
#could not use fasttime because not in correct format ("YYYY-mm-dd hh:mm:ss")
dataChunk$DATE_AND_TIME_SR <- as.POSIXct(dataChunk$DATE_AND_TIME_SR, format="%Y%m%d %H:%M:%S")
week1.Mon.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.mon & dataChunk$DATE_AND_TIME_SR < week1.tue & dataChunk$AGENCY == "LADOT",]
week1.Tue.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.tue & dataChunk$DATE_AND_TIME_SR < week1.wed & dataChunk$AGENCY == "LADOT",]
week1.Wed.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.wed & dataChunk$DATE_AND_TIME_SR < week1.thu & dataChunk$AGENCY == "LADOT",]
week1.Thu.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.thu & dataChunk$DATE_AND_TIME_SR < week1.fri & dataChunk$AGENCY == "LADOT",]
week1.Fri.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.fri & dataChunk$DATE_AND_TIME_SR < week1.sat & dataChunk$AGENCY == "LADOT",]
if (nrow(dataChunk) != chunkSize){
print('Processed all files!')
break}
#drop ConfigID, Occupancy, Speed, HOVSpeed variables
dataChunk <- fread(week1.file, nrows=chunkSize, skip=(index * chunkSize), header=FALSE, sep="|",stringsAsFactors = T, drop=c(1,4,5,7))
names(dataChunk) <- colnames
}
rm(dataChunk)
#combine lists into df
week1.df.Mon <- rbindlist(week1.Mon.list)
rm(week1.Mon.list)
week1.df.Tue <- rbindlist(week1.Tue.list)
rm(week1.Tue.list)
week1.df.Wed <- rbindlist(week1.Wed.list)
rm(week1.Wed.list)
week1.df.Thu <- rbindlist(week1.Thu.list)
rm(week1.Thu.list)
week1.df.Fri <- rbindlist(week1.Fri.list)
rm(week1.Fri.list)
#sort each df
week1.df.Mon <- week1.df.Mon[order(week1.df.Mon$DATE_AND_TIME_SR,decreasing=FALSE),]
week1.df.Tue <- week1.df.Tue[order(week1.df.Tue$DATE_AND_TIME_SR,decreasing=FALSE),]
week1.df.Wed <- week1.df.Wed[order(week1.df.Wed$DATE_AND_TIME_SR,decreasing=FALSE),]
week1.df.Thu <- week1.df.Thu[order(week1.df.Thu$DATE_AND_TIME_SR,decreasing=FALSE),]
week1.df.Fri <- week1.df.Fri[order(week1.df.Fri$DATE_AND_TIME_SR,decreasing=FALSE),]
week1.df <- rbind(week1.df.Mon,week1.df.Tue,week1.df.Wed,week1.df.Thu,week1.df.Fri)
#plot percent of successful sensor reports for week
week1.table <- table(week1.df$LINK_ID, week1.df$LINK_STATUS)
week1.prop <- prop.table(week1.table,1)
week1.prop.df <- data.frame(week1.prop[,1])
week1.prop.df <- subset(week1.prop.df, week1.prop...1. != "NaN")
hist(week1.prop.df[,1],main="Figure 1. Successful Reports on 10/19/2015-10/23/2015",xlab="Percent of Week Total",ylab="Number of Sensors",breaks=seq(0,1,.05),border="NA",col="#999999",las=1)
#plot percent of successful sensor reports for monday
mon.table <- table(week1.df.Mon$LINK_ID, week1.df.Mon$LINK_STATUS)
mon.prop <- prop.table(mon.table,1)
mon.prop.df <- data.frame(mon.prop[,1])
mon.prop.df <- subset(mon.prop.df, mon.prop...1. != "NaN")
hist(mon.prop.df[,1],main="Figure 1. Successful Reports on 10/20/2015",xlab="Percent of Daily Total",ylab="Number of Sensors",breaks=seq(0,1,.05),border="NA",col="#999999",las=1)
#need to extract list of bad sensors
badsensors <- rownames(subset(mon.prop.df, mon.prop...1. < 0.9))
#pull out sensors reporting below 90%
week1.df.Mon.Filter <- week1.df.Mon[!(LINK_ID %in% badsensors)]
week1.df.Mon.Sum <- aggregate(week1.df.Mon.Filter$VOLUME, by=list(LINK_ID=week1.df.Mon.Filter$LINK_ID), FUN=sum)
#output
write.csv(week1.df.Mon, file="Z:/GIS/DataLibrary/Riits/TrafficVolumes/data dump/EXPORT - 2015/20151020.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment