# install.packages("tidyverse") library(tidyverse) ablation <- read.csv("Ablation.csv", header = TRUE, stringsAsFactors = TRUE) names(ablation)[names(ablation) == "SCORE"] <- "Score" ggplot(ablation, aes(x = Time, y = Score)) + geom_point() ggplot(ablation, aes(x = Time, y = Score)) + geom_point(color = "red", size = 4) ggplot(ablation, aes(x = Time, y = Score)) + geom_point(aes(color = Experiment), size = 4) ggplot(ablation, aes(x = Time, y = Score)) + geom_point(aes(color = Experiment, shape = CellType), size = 4) ggplot(ablation, aes(x = Time, y = Score)) + geom_point(aes(color = Experiment), size = 4) + geom_text(aes(label = CellType), hjust = 0, size = 3) p <- ggplot(ablation, aes(x = Time, y = Score)) p <- p + geom_point(aes(color = Experiment, shape = Measurement), size = 4) p <- p + geom_line(aes(group = interaction(Experiment, Measurement, CellType), color = Experiment, linetype = CellType)) print(p) interaction(ablation$Experiment, ablation$Measurement, ablation$CellType) levels(interaction(ablation$Experiment, ablation$Measurement, ablation$CellType)) p <- ggplot(ablation, aes(x = Time, y = Score)) p + geom_point(aes(color = Experiment, shape = Measurement), size = 4) + scale_shape_manual(values = c(1,16)) p + geom_point(aes(color = Experiment, shape = Measurement), size = 4, alpha = 0.5) + scale_shape_manual(values = c(1,16)) p + geom_point(aes(color = Experiment, shape = Measurement), size = 4, position = position_dodge(0.5)) + scale_shape_manual(values = c(1,16)) p <- ggplot(ablation, aes(x = Time, y = Score, color = Experiment)) p <- p + geom_point(aes(shape = Measurement), size = 4) p <- p + geom_line(aes(group = interaction(Experiment, Measurement, CellType), linetype = CellType)) print(p) ( p <- p + labs(title = "Ablation", x = "Time (minutes)", y = "% Saturation") ) ( p <- p + theme_bw() + theme(plot.title = element_text(h = 0.5))) p + scale_x_continuous(breaks = c(0,5,10,20,30)) p + scale_x_continuous(breaks = unique(ablation$Time)) p + scale_shape_manual(values = c(1,16), labels = c("LDLR", "TfR")) + scale_linetype_discrete(name = "Cell type") + scale_color_manual(values = c("blue", "red", "green")) library(RColorBrewer) display.brewer.all() p + facet_grid(Measurement ~ .) p + facet_grid(. ~ Measurement) p + facet_grid(Experiment ~ Measurement) p + facet_grid(Measurement ~ Experiment) p + facet_grid(Measurement ~ Experiment) + scale_color_discrete(guide = "none") + scale_shape_discrete(guide = "none") trees <- data.frame(x = rnorm(100), y = rnorm(100), size = rnorm(100, mean = 5, sd = 2)) ants <- data.frame(a = rnorm(10000, sd = 0.4, mean = 1), b = rnorm(10000, sd = 0.2, mean = -1)) p1 <- ggplot() + geom_point(data = ants, aes(x = a, y = b), color = "brown", size = 2, alpha = 0.01) + geom_point(data = trees, aes(x = x, y = y, size = size), color = "green", shape = 8) print(p1) p2 <- ggplot() + stat_density2d(data = ants, aes(x = a, y = b, fill = ..level..), alpha = 0.5, geom = "polygon") + geom_point(data = trees, aes(x = x, y = y, size = size), color = "green", shape = 8) + scale_fill_gradient(low = "white", high = "brown") + scale_size_continuous(name = "Tree size") print(p2) pdf(file = "figures.pdf", paper = "letter") print(p) print(p1) print(p2) dev.off() # Bonus code: how to display all the possible shapes i <- 0:25; x <- i %/% 6; y <- i %% 6 df <- data.frame(name = i, row = x, column = y) ggplot(df, aes(x = row, y = column)) + geom_point(shape = 0:25, fill = "yellow", size = 4) + geom_text(label = i, vjust = 2) + scale_y_reverse() + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x = element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) # Data wrangling library(tidyverse) # using tidyr set.seed(1) mouse_weights_sim <- data.frame( time = seq(as.Date("2017/1/1"), by = "month", length.out = 12), mickey = rnorm(12, 20, 1), minnie = rnorm(12, 20, 2), mighty = rnorm(12, 20, 4) ) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, mickey, minnie, mighty) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, -time) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, mickey:mighty) ggplot(mouse_weights, aes(x = mouse, y = weight)) + geom_boxplot(aes(fill = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) + geom_point(aes(color = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_point(aes(color = mouse)) + geom_line(aes(group = mouse, color = mouse)) #rm(USPersonalExpenditure) uspe_df <- as.data.frame(USPersonalExpenditure) uspe_df$Category <- rownames(USPersonalExpenditure) uspe <- gather(uspe_df, Year, Amount, -Category) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = c(0,1)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", position = "dodge", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = c(0,1)) spread(data = mouse_weights, key = mouse, value = weight) spread(ablation, key = CellType, value = Score) abl_united <- unite(ablation, ExptCell, Experiment, CellType, sep = ".") spread(abl_united, ExptCell, Score) separate(abl_united, ExptCell, c("Expt", "Cell"), sep = "\\.") library(dplyr) experiment_log <- data.frame(Experiment = c("E1909", "E1915", "E1921"), Tech = c("Goneril", "Regan", "Cordelia"), stringsAsFactors = TRUE) str(experiment_log) experiment_log inner_join(ablation, experiment_log) save(ablation, file = "ablation.Rdata") load("ablation.Rdata") head(select(msleep, name, sleep_total)) head(msleep[ , c("name", "sleep_total")]) class(msleep) msleep %>% select(name, sleep_total) %>% head msleep %>% select(name, sleep_total) %>% head head(msleep[ , -1]) head(select(msleep, -name)) head(select(msleep, -c(name, sleep_total))) msleep %>% select(-c(name, sleep_total)) %>% head msleep %>% select(starts_with("sl")) %>% head head(msleep[ , startsWith(names(msleep), "sl")]) msleep[msleep$sleep_total >= 16, ] msleep %>% filter(sleep_total >= 16) msleep %>% filter(order %in% c("Perissodactyla", "Primates")) msleep[msleep$order %in% c("Perissodactyla", "Primates"), ] msleep %>% filter(sleep_total >= 16, bodywt >= 1) msleep %>% filter(sleep_total >= 16 & bodywt >= 1) msleep[msleep$sleep_total >= 16 & msleep$bodywt >=1, ] msleep %>% arrange(order) %>% head msleep %>% arrange(desc(order)) %>% head msleep %>% select(name, order, sleep_total) %>% arrange(order, sleep_total) %>% head msleep %>% arrange(order, sleep_total) %>% select(name, order) %>% head ToothGrowth %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp) %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp, dose) %>% summarize(meanLen = mean(len), n = n()) ToothGrowth %>% group_by(supp, dose) %>% mutate(norm.len = (len - mean(len))/sd(len), max = max(len)) %>% print(n = 60) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean_score = mean(Score)) %>% spread(CellType, mean_score) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(min = min(Score), max = max(Score)) ablation_mean_sd <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean = mean(Score), sd = sd(Score)) ggplot(ablation_mean_sd, aes(x = Time, y = mean)) + geom_point(size = 4) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation, aes(y = Score), color = "pink", shape = 1) + labs(title = "+/- 1 SD") ablation_mean_ci <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean = mean(Score), lower_limit = t.test(Score)$conf.int[1], upper_limit = t.test(Score)$conf.int[2]) ablation %>% mutate(rate = ifelse(Time > 0, Score / Time, NA)) ggplot(ablation_mean_sd, aes(x = Time, y = mean)) + geom_point(size = 2) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation %>% group_by(Measurement, CellType, Time) %>% mutate(outlier = abs((Score - mean(Score)) / sd(Score)) > 1), aes(y = Score, color = outlier), size = 4, shape = 1) + labs(title = "+/- 1 SD", y = "Mean") + scale_colour_discrete(name = "Outlier Status", labels = c("Within 1 SD", "Outside 1 SD")) # Complete example library(tidyverse) # using ggplot2, dplyr, tidyr packages analyze.all <- function(save_plots = TRUE) { # Load data ablation <- read.csv(file = "Ablation.csv", header = TRUE, stringsAsFactors = TRUE) names(ablation)[names(ablation) == "SCORE"] <- "Score" print(ablation) ablation_means <- ablation %>% group_by(CellType, Measurement, Time) %>% summarize(mean = mean(Score), n = n()) print(ablation_means) # Set up plotting if (save_plots) { pdf(file = "plot.pdf") } # Plot all data g <- ggplot(ablation, aes(x = Time, y = Score)) + geom_point() + geom_line(aes(color = Experiment)) + facet_grid(Measurement ~ CellType) + theme_bw() print(g) # Plot averages over experiments g <- ggplot(ablation_means, aes(x = Time, y = mean)) + geom_point() + geom_line(aes(color = CellType)) + facet_wrap(~ Measurement) + theme_bw() print(g) # Separate plots of averages over experiments for (measurement in levels(ablation.means$Measurement)) { g <- ggplot(ablation_means %>% filter(Measurement == measurement), aes(x = Time, y = mean)) + geom_point() + geom_line(aes(color = CellType)) + labs(title = measurement) + theme_bw() print(g) } # Close plotting device if (save_plots) { dev.off() } } analyze.all(FALSE)