Created
August 8, 2013 10:45
-
-
Save aaronsaunders/6183633 to your computer and use it in GitHub Desktop.
Manipulating data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with() | |
within() | |
round(x, n) # rounds x to n decimal places | |
ceiling(x) # vector x of smallest integers > x | |
floor(x) # vector x of largest interger < x | |
as.integer(x) # truncates real x to integers (compare to round(x, 0) | |
as.integer(x < cutpoint) # vector x of 0 if less than cutpoint, 1 if greater than cutpoint) | |
# vectorised logical | |
ifelse(test, true_value, false_value) | |
factor(ifelse(a < cutpoint, "Neg", "Pos")) # is another way to dichotomize and to make a factor for analysis | |
bymedian <- with(InsectSprays, reorder(spray, count, median)) # reorders spray factor by the median count of the observations. | |
transform(data.df, variable names = some operation) # can be part of a set up for a data set | |
replace(x, list, values) # remember to assign this to some object i.e., x <- replace(x,x==-9,NA) | |
# similar to the operation x[x==-9] <- NA | |
cut(x, breaks, labels = NULL, | |
include.lowest = FALSE, right = TRUE, dig.lab = 3, ...) | |
cut(1:10, breaks=2, labels=c("low", "high") | |
###################### | |
# SUBSETTING | |
# Selecting Observations (Rows) | |
newdata <- mydata[1:5, ] # first 5 observerations | |
# based on variable values | |
newdata <- mydata[ which(mydata$gender=='F' & mydata$age > 65), ] | |
newdata <- mydata[c(1,5:10), ] | |
dat.csv[1:10, "female"] | |
dat.csv$female[1:10] | |
# Selecting Variables (columns) | |
myvars <- c("v1", "v2", "v3") | |
newdata <- mydata[myvars] | |
myvars <- names(mydata) %in% c("v1", "v2", "v3") | |
newdata <- mydata[!myvars] | |
newdata <- mydata[c(-3,-5)] # exclude 3rd and 5th variable | |
mydata$v3 <- mydata$v5 <- NULL # delete variables v3 and v5 | |
# Subset Function | |
subset(dataset, logical) # those objects meeting a logical criterion | |
subset(data.df, select=variables, logical) # get those objects from a data frame that meet a criterion | |
newdata <- subset(mydata, age >= 20 | age < 10, select=c(ID, Weight)) | |
newdata <- subset(mydata, sex=="m" & age > 25, select=weight:income) | |
# Random Samples | |
# take a random sample of size 50 from a dataset mydata | |
# sample without replacement | |
mysample <- mydata[sample(1:nrow(mydata), 50, | |
replace=FALSE),] | |
########################################### | |
# Sorting | |
rev(x) reverses the elements of x | |
sort(x) sorts the elements of x in increasing order; | |
rev(sort(x)) to sort in decreasing order: | |
order(..., na.last = TRUE, decreasing = FALSE) | |
order(vector) # returns the indexs in order | |
order(vector, decreasing = T) # for decreasing | |
x[order(x$B), ] # sort a dataframe by the order of the elements in B | |
x[rev(order(x$B)), ] # sort the dataframe in reverse order | |
complete <- subset(data.df, complete.cases(data.df)) # find those cases with no missing values | |
rank(vector) # as order but handling ties differently | |
rank(x, na.last = TRUE, | |
ties.method = c("average", "first", "random", "max", "min")) | |
# sorting examples using the mtcars dataset | |
attach(mtcars) | |
newdata <- mtcars[order(mpg),] # sort by mpg | |
newdata <- mtcars[order(mpg, cyl),] # sort by mpg and cyl | |
newdata <- mtcars[order(mpg, -cyl),] #sort by mpg (ascending) and cyl (descending) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
substr(myString, start, stop) | |
# writing a vector of strings, | |
write.table(vector_of_interest, file="vector.txt", sep="\n", | |
col.names = FALSE, row.names=FALSE, quote=FALSE) | |
write.table(myData, file = "table.txt", sep = "\t", quote = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment