Chapter 23 Assignments part II
Try and create any type of graph you want on the basis of your data! I have also given some examples.
23.1 Hagoort
library(tidyverse)
# install.packages("openxlsx")
library(openxlsx)
df_Ha <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Hagoort.xlsx",
sheet = "Gait parameters ", startRow = 2)
# Note; typically I would use the readxl-package, but this package
# Does not allow you to read in online-excel files
# It is wise not to have your variables be named identically! Let's fix this
df_Ha <- as_tibble(df_Ha, .name_repair = "unique")
ggplot(df_Ha, aes(x = X3, y = `V..6`)) +
geom_smooth(colour = "white", fill = "orange", alpha = 0.5) +
geom_point(size = 4, colour = "orange") +
theme_classic() +
labs(x = "gait speed m/s", y = "RMS acc m/sec2 V")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
23.2 Hogelin
library(tidyverse)
# install.packages("openxlsx")
library(openxlsx)
df_Ho <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Hogeling.xlsx")
# Note; typically I would use the readxl-package, but this package
# Does not allow you to read in online-excel files
df_Ho_red <- df_Ho %>% select(1:50) %>% slice(1:50) %>% # select 50 vars & cases
mutate_at(2:50, as.numeric) # turn to numeric variables
ggplot(df_Ho_red, aes(x = Hugo_Symbol, y = `TCGA-AB-2802-03A-01R-0757-21`)) +
geom_point() +
coord_flip()
df_Ho_red2 <- df_Ho_red %>% gather(2:50, key = "Gene", value = "Score")
ggplot(df_Ho_red2, aes(x = Hugo_Symbol, y = Gene, fill = Score)) +
geom_raster()
23.3 Maat
library(tidyverse)
# The "bed"-file; has no column names!
df_Ma1 <- read_delim("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/mobPBSC_H3K4me3_D2_peaks.bed",
delim = "\t", col_names = FALSE)
# wig is a funky format that I don't understand!
# rtracklayer seems like a package needed
ggplot(df_Ma1, aes(x = X5)) +
geom_histogram(fill = "grey", colour = "white", binwidth = 5) +
theme_minimal() +
labs(x ="Expression")
23.4 Van Rooij
library(tidyverse)
# install.packages("openxlsx")
library(openxlsx)
df_Ro <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Van%20Rooij.xlsx",
startRow = 3)
# Note; typically I would use the readxl-package, but this package
# Does not allow you to read in online-excel files
ggplot(df_Ro, aes(x = reorder(factor(Country), Country, length))) +
geom_bar(fill = "lightblue", colour = "black") +
labs(y = "frequency", x = "country") +
coord_flip() +
theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank())
df_Ro_sel <- df_Ro %>%
gather(c("Pub16", "Pub15", "Pub14", "Pub13", "Pub12", "Pub11"),
key = "Year", value = "Pubs") %>%
mutate(Year_num = case_when(Year == "Pub11" ~ 2011,
Year == "Pub12" ~ 2012,
Year == "Pub13" ~ 2013,
Year == "Pub14" ~ 2014,
Year == "Pub15" ~ 2015,
Year == "Pub16" ~ 2016))
data = filter(df_Ro_sel, UnivAll == "RUG")
ggplot(df_Ro_sel, aes(x = Year_num, y = Pubs)) +
geom_line(aes(group = UnivAll), colour = "grey", alpha = 0.7) +
geom_line(data = filter(df_Ro_sel, UnivAll == "RUG"),
colour = "#CC0000", size = 2) +
theme_minimal() +
labs(x = "Year", y = "Publication score") +
theme(panel.grid = element_blank())
## Warning: Removed 19 rows containing missing values (geom_path).
23.5 Santhakumar
library(tidyverse)
# install.packages("openxlsx")
library(openxlsx)
# I've turned your xls file into xlsx
df_Sa1 <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Santhakumar1.xlsx",
sheet = "Data", startRow = 4)
# Note; typically I would use the readxl-package, but this package
# Does not allow you to read in online-excel files
df_Sa2 <- read_csv("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Santhakumar2.csv")
df_Sa1_lg <- df_Sa1 %>% gather(`1960`:`2018`, key = "Year", value = "C02")
ggplot(filter(df_Sa1_lg, Country.Name %in% c("Australia", "Netherlands", "United States")),
aes(x = as.numeric(Year), y = C02)) +
geom_line(aes(colour = Country.Name), size = 2) +
labs(x = "Year", colour = NULL) +
theme_minimal()
## Warning: Removed 12 rows containing missing values (geom_path).
23.6 Seibel
library(tidyverse)
# install.packages("haven")
library(haven)
df_Se <- read_sav("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Seibel.sav")
# SPSS handles labels of categorical variables a bit differently than R.
# It’s better to convert all labelled variables into factors,
# that R can more easily deal with (you don’t have to do this though!).
# You can do this simply by:
df_Se <- as_factor(df_Se)
ggplot(filter(df_Se, !is.na(ident_a) & sex %in% c("man", "woman")),
aes(x = sex, fill = ident_a)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Set1") +
theme_minimal()
23.7 Zhang
library(tidyverse)
# install.packages("openxlsx")
library(openxlsx)
df_Za1 <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Zhang.xlsx")
# I had to make the below excel file compatible with R by copying data to new sheet
df_Za2 <- read.xlsx("https://stulp.gmw.rug.nl/21-03-2019/ggplotworkshop/data/Zhang2.xlsx",
sheet = "Sheet1")
# Note; typically I would use the readxl-package, but this package
# Does not allow you to read in online-excel files
df_Za1_lg <- df_Za1 %>% gather(c("A":"F"), key = "Letter", value = "Score")
ggplot(df_Za1_lg, aes(x = `Time/minutes`, y = Score)) +
geom_line(aes(colour = Letter), size = 2) +
scale_colour_brewer(palette = "Set1") +
theme_classic() +
labs(colour = NULL)
23.8 Other assignments
23.8.1 Assignment 1
In Chapter 10, we used ggplot(mpg, aes(x=cty, colour=drv, fill=drv)) + geom_histogram(binwidth=1)
to compare distributions. As we’ve seen for bar charts in 14, default settings can be informative. In this case, it might be good to realise that the default settings within geom_histogram
, includes position=stack
. Alternatives for ‘position’ are “identity” and “dodge”. Try them both and explain what’s going on. Also compare them to ggplot(mpg, aes(x=cty, colour=drv, fill=drv)) + geom_freqpoly(binwidth=1)
23.8.2 Assignment 2
We’re interested in whether car-models from 2007 are more fuel efficient than those from 1999. Create a boxplot of the variable “cty” (fuel efficiency within the city) on the y-axis and year on the x-axis. Add the raw data by adding jitter. Make the jitter points blue and seethrough!
23.8.3 Assignment 3
Show the association between “displ” and “drv” for the different years using facets.