Created
January 7, 2016 19:34
-
-
Save black-tea/4c0cbafc145800d86db9 to your computer and use it in GitHub Desktop.
This script summarizes volume data we obtained from the RIITS program. It filters data for only those sensors performing above 90% reporting and then sums over each LinkID for the day/week.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(fasttime) | |
#input | |
week1.file = "Z:/GIS/DataLibrary/Riits/TrafficVolumes/data dump/EXPORT - 2015/october.dsv" | |
#parameters that will apply to the data | |
chunkSize <- 500000 | |
#date ranges | |
week1.mon <- as.POSIXct("2015-10-19 00:00:00") | |
week1.tue <- as.POSIXct("2015-10-20 00:00:00") | |
week1.wed <- as.POSIXct("2015-10-21 00:00:00") | |
week1.thu <- as.POSIXct("2015-10-22 00:00:00") | |
week1.fri <- as.POSIXct("2015-10-23 00:00:00") | |
week1.sat <- as.POSIXct("2015-10-24 00:00:00") | |
#this will be the list of dataframes for each day | |
week1.Mon.list <- list() | |
week1.Tue.list <- list() | |
week1.Wed.list <- list() | |
week1.Thu.list <- list() | |
week1.Fri.list <- list() | |
print("Begin Processing") | |
#First Chunk (with headers) | |
dataChunk <- fread(week1.file, nrows=chunkSize, header=T, sep="|", stringsAsFactors = T, drop=c(1,4,5,7)) | |
colnames <- names(dataChunk) | |
index <- 0 | |
repeat { | |
index <- index + 1 | |
print(paste('Processing rows:', index * chunkSize)) | |
#convert chr to time, add to df if within time period & if agency is "LADOT" | |
#could not use fasttime because not in correct format ("YYYY-mm-dd hh:mm:ss") | |
dataChunk$DATE_AND_TIME_SR <- as.POSIXct(dataChunk$DATE_AND_TIME_SR, format="%Y%m%d %H:%M:%S") | |
week1.Mon.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.mon & dataChunk$DATE_AND_TIME_SR < week1.tue & dataChunk$AGENCY == "LADOT",] | |
week1.Tue.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.tue & dataChunk$DATE_AND_TIME_SR < week1.wed & dataChunk$AGENCY == "LADOT",] | |
week1.Wed.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.wed & dataChunk$DATE_AND_TIME_SR < week1.thu & dataChunk$AGENCY == "LADOT",] | |
week1.Thu.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.thu & dataChunk$DATE_AND_TIME_SR < week1.fri & dataChunk$AGENCY == "LADOT",] | |
week1.Fri.list[[index]] <- dataChunk[dataChunk$DATE_AND_TIME_SR >= week1.fri & dataChunk$DATE_AND_TIME_SR < week1.sat & dataChunk$AGENCY == "LADOT",] | |
if (nrow(dataChunk) != chunkSize){ | |
print('Processed all files!') | |
break} | |
#drop ConfigID, Occupancy, Speed, HOVSpeed variables | |
dataChunk <- fread(week1.file, nrows=chunkSize, skip=(index * chunkSize), header=FALSE, sep="|",stringsAsFactors = T, drop=c(1,4,5,7)) | |
names(dataChunk) <- colnames | |
} | |
rm(dataChunk) | |
#combine lists into df | |
week1.df.Mon <- rbindlist(week1.Mon.list) | |
rm(week1.Mon.list) | |
week1.df.Tue <- rbindlist(week1.Tue.list) | |
rm(week1.Tue.list) | |
week1.df.Wed <- rbindlist(week1.Wed.list) | |
rm(week1.Wed.list) | |
week1.df.Thu <- rbindlist(week1.Thu.list) | |
rm(week1.Thu.list) | |
week1.df.Fri <- rbindlist(week1.Fri.list) | |
rm(week1.Fri.list) | |
#sort each df | |
week1.df.Mon <- week1.df.Mon[order(week1.df.Mon$DATE_AND_TIME_SR,decreasing=FALSE),] | |
week1.df.Tue <- week1.df.Tue[order(week1.df.Tue$DATE_AND_TIME_SR,decreasing=FALSE),] | |
week1.df.Wed <- week1.df.Wed[order(week1.df.Wed$DATE_AND_TIME_SR,decreasing=FALSE),] | |
week1.df.Thu <- week1.df.Thu[order(week1.df.Thu$DATE_AND_TIME_SR,decreasing=FALSE),] | |
week1.df.Fri <- week1.df.Fri[order(week1.df.Fri$DATE_AND_TIME_SR,decreasing=FALSE),] | |
week1.df <- rbind(week1.df.Mon,week1.df.Tue,week1.df.Wed,week1.df.Thu,week1.df.Fri) | |
#plot percent of successful sensor reports for week | |
week1.table <- table(week1.df$LINK_ID, week1.df$LINK_STATUS) | |
week1.prop <- prop.table(week1.table,1) | |
week1.prop.df <- data.frame(week1.prop[,1]) | |
week1.prop.df <- subset(week1.prop.df, week1.prop...1. != "NaN") | |
hist(week1.prop.df[,1],main="Figure 1. Successful Reports on 10/19/2015-10/23/2015",xlab="Percent of Week Total",ylab="Number of Sensors",breaks=seq(0,1,.05),border="NA",col="#999999",las=1) | |
#plot percent of successful sensor reports for monday | |
mon.table <- table(week1.df.Mon$LINK_ID, week1.df.Mon$LINK_STATUS) | |
mon.prop <- prop.table(mon.table,1) | |
mon.prop.df <- data.frame(mon.prop[,1]) | |
mon.prop.df <- subset(mon.prop.df, mon.prop...1. != "NaN") | |
hist(mon.prop.df[,1],main="Figure 1. Successful Reports on 10/20/2015",xlab="Percent of Daily Total",ylab="Number of Sensors",breaks=seq(0,1,.05),border="NA",col="#999999",las=1) | |
#need to extract list of bad sensors | |
badsensors <- rownames(subset(mon.prop.df, mon.prop...1. < 0.9)) | |
#pull out sensors reporting below 90% | |
week1.df.Mon.Filter <- week1.df.Mon[!(LINK_ID %in% badsensors)] | |
week1.df.Mon.Sum <- aggregate(week1.df.Mon.Filter$VOLUME, by=list(LINK_ID=week1.df.Mon.Filter$LINK_ID), FUN=sum) | |
#output | |
write.csv(week1.df.Mon, file="Z:/GIS/DataLibrary/Riits/TrafficVolumes/data dump/EXPORT - 2015/20151020.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment