diamonds data set

library(ggplot2) # for diamnods dataset
library(dplyr) # for aggreagte functions
library(pryr) # for memory usage stats (not used)

# make a cpoy of the dataset
data(diamonds) # reset any previous alterations
diamonds2 = diamonds

# clean dataset, remove factors Source: http://stackoverflow.com/a/2853231
i = sapply(diamonds2, is.factor)
diamonds2[i] <- lapply(diamonds2[i], as.character)

# Start the clock!
start = ptm <- proc.time() 

# new testing method of assigning a universal variable and have one aggregate functions
cut_input = "Ideal"

if (cut_input == "Ideal"){
  var2 = diamonds2$x
} else if (cut_input == "Premium") {
  var2 = diamonds2$y
} else if (cut_input == "Good") {
  var2 = diamonds2$z
}

diamonds2 %>% group_by(cut) %>% 
              summarise(m = sum(var2))  %>% 
              mutate(new_col = m * 2) 
## Source: local data frame [5 x 3]
## 
##         cut        m  new_col
##       (chr)    (dbl)    (dbl)
## 1      Fair 309138.6 618277.2
## 2      Good 309138.6 618277.2
## 3     Ideal 309138.6 618277.2
## 4   Premium 309138.6 618277.2
## 5 Very Good 309138.6 618277.2
# Stop the clock
end = proc.time() - ptm
total = start - end
total
##    user  system elapsed 
##    0.89    0.20    1.45
###############################################

# Start the clock!
start2 = ptm <- proc.time()

# original method of not assinging a reactive/universal variable and instead directly
# supplying the aggreate functions in the if/else logic
cut_input = "Ideal"

if (cut_input == "Ideal"){
  diamonds2 %>% group_by(cut) %>% 
              summarise(m = sum(x))  %>%                       
              mutate(new_col = m * 2) 
} else if (cut_input == "Premium") {
  diamonds2 %>% group_by(cut) %>% 
              summarise(m = sum(y))  %>%                       
              mutate(new_col = m * 2) 
} else if (cut_input == "Good") {
  diamonds2 %>% group_by(cut) %>% 
              summarise(m = sum(z))  %>%                       
              mutate(new_col = m * 2) 
}
## Source: local data frame [5 x 3]
## 
##         cut         m   new_col
##       (chr)     (dbl)     (dbl)
## 1      Fair  10057.50  20115.00
## 2      Good  28645.08  57290.16
## 3     Ideal 118691.07 237382.14
## 4   Premium  82385.88 164771.76
## 5 Very Good  69359.09 138718.18
# Stop the clock
end2 = proc.time() - ptm
total2 = start2 - end2
total2
##    user  system elapsed 
##    1.00    0.20    1.58
# method comparision
difference = abs(total["elapsed"] - total2["elapsed"])
difference = unname(difference["elapsed"])

if (total["elapsed"] < total2["elapsed"]) {
  text = paste0("The new method is faster by, ", difference, " seconds.")
  print(text)
} else {
  print("The original method is faster.")
}
## [1] "The new method is faster by, 0.13 seconds."