Hellen Gakuruh
2017-04-03
What we will cover:
n
X11() IN UNIX, windows() in Windows and quartz() in macplot()plot() can produce a variety of different plots depending on type/class of first argument (hence, plot() is completely reliant on class(object))class(x) = "factor", a bar plot is producedclass(x) == "character" as plot needs a finite object to set a plotting windowplot() will make plots per elements type plot(obj) (without changing/giving other arguments)n
ts <- ts(rnorm(12, 50), start = 1, end = 12, frequency = 1)
class(ts)
[1] "ts"
n
plot(ts)
n
num <- rnorm(12, 50)
class(num)
[1] "numeric"
n
plot(num)
n
fac <- factor(sample(c("Y", "N"), 100, T, c(0.7, 0.3)))
class(fac)
[1] "factor"
n
plot(fac)
n
num2 <- rnorm(12, 88)
class(num2)
[1] "numeric"
n
plot(num, num2)
n
set.seed(5)
num3 <- rnorm(100, 88)
class(num3)
[1] "numeric"
n
plot(fac, num3)
n
fac2 <- factor(sample(c("F", "M"), 100, T, c(0.8, 0.2)))
class(fac2)
[1] "factor"
n
plot(fac, fac2)
plot() depends on first (and “y”) argument, but how it is generated depends on values parsed to other argumentpar()… more on this later (read ?par)hist() for histograms (univariate continuous distributions)boxplot() for box-and-whiskers plot (for univariate numerical variables alone or categorised by a categorical variable)barplot() for bar plots (for categorical distribution)pie() for pie chart (for categorical distribution)point(), lines(), text(), title(), abline(), polygon(), legend(), and axis()locator() and identify()locator(n, type): one can select “n” number of points using left mouse button and if type is not specified, a list with two components x and y is outputted otherwise plotting over selected points given “type” is donelocator() is particularly handy in locating position for legends, and labels e.g. text(locator(1), "Outlier", adj=0)identify(x, y, labels) is used to highlight any of the points defined by x and y (using left mouse button)par() for complete listpar("parameter") e.g. par("mfrow")set.seed(5)
response <- sample(c("Yes", "No"), 300, T, c(0.68, 0.32))
tab_response <- table(response)
pie(tab_response, col = c("#99CCFF", "#6699CC"))
labs <- paste0("(", round(as.vector(prop.table(tab_response)*100)), "%)")
text(x = c(0.78, -0.50), y = c(0.80, -1), labels = c(labs[1], labs[2]))
barplot(sort(tab_response, decreasing = TRUE), las = 1, col = c("#6699CC", "#99CCFF"))
title("Bar chart", xlab = "Response", ylab = "Frequency")
set.seed(5)
months <- sample(month.abb[c(3, 6, 9, 12)], size = 300, replace = TRUE)
tab_months <- table(months)[c("Mar", "Jun", "Sep", "Dec")]
tab_months
months
Mar Jun Sep Dec
81 78 60 81
n
dotchart(as.numeric(tab_months), xlab = "Total student's Trained", ylab = "Quarters", bg = 4)
title("Total students trained by quarters (2016)", sub = "Data Mania Inc.,", font.sub = 3, col.sub = "#6699CC", cex.sub = 0.9)
axis(2, at = 1:4, labels = names(tab_months), las = 2)
set.seed(5)
gender <- sample(c("Female", "Male"), 300, TRUE, c(0.7, 0.3))
monthgen_tab <- table(gender, months)[, c("Dec", "Sep", "Jun", "Mar")]
monthgen_tab
months
gender Dec Sep Jun Mar
Female 0 49 78 81
Male 81 11 0 0
barplot(monthgen_tab, col = c("#6699CC", "#99CCFF"), beside = TRUE)
legend("topright", legend = c("Female", "Male"), pch = 22 , pt.bg = c("#6699CC", "#99CCFF"), xpd = TRUE, cex = 0.75)
title("Student's trained by gender and month (2016)", xlab = "Month", ylab = "Number trained", sub = "Data Mania Inc.", cex.sub = 0.9, col.sub = "#6699CC", font.sub = 3)
dotchart(as.matrix(monthgen_tab)[, c("Mar", "Jun", "Sep", "Dec")], bg = 4, xlab = "Total number of student's trained")
title("Total student's trained by gender and month", sub = "Data Mania Inc.", font.sub = 3, cex.sub = 0.9, col.sub = "#6699CC")
title(ylab = "Gender and month", line = 2.5)
# Convert Titanic data
titanic_passengers <- colSums(Titanic[-4,,,])
titanic_passengers
, , Survived = No
Age
Sex Child Adult
Male 35 659
Female 17 106
, , Survived = Yes
Age
Sex Child Adult
Male 29 146
Female 28 296
n
# Plotting four fold plot
fourfoldplot(titanic_passengers, std = "margins")
mosaicplot(titanic_passengers, color = TRUE)
# Height of tiles
prop.table(apply(titanic_passengers, 1:2, sum), 1)
Age
Sex Child Adult
Male 0.07364787 0.9263521
Female 0.10067114 0.8993289
# Example data: Edgar Anderson's Iris Data
sepal <- iris$Sepal.Length
sepal
[1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
[18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
[35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
[52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
[69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
[86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
[103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
[120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
[137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
op <- par("mfrow")
par(mfrow = c(1, 2))
hist(sepal, col = "#99CCFF", ann = FALSE)
title("Breaks = 10", xlab = "Sepal Length", ylab = "Frequency")
hist(sepal, nclass = 15, col = "#6699cc", ann = FALSE)
title("Breaks = 15", xlab = "Sepal Length", ylab = "Frequency")
par(mfrow = op)
dens_sepal <- density(sepal)
plot(dens_sepal, type = "n")
polygon(dens_sepal, col = "#99CCFF")
states <- as.data.frame(state.x77[, c("Illiteracy", "Life Exp", "Murder", "HS Grad")])
# Layout (1 row by 2 columns)
op <- par("mfrow")
par(mfrow = c(1, 2))
# Visualise distributions
boxplot(states$Illiteracy, col = "#99CCFF")
boxplot(states$'Life Exp', col = "#6699CC")
# Reset original layout
par(mfrow = op)
stripchart(round(num, 1), pch = 22, bg = col[1])
title("Dot plot for small sample size", xlab = "Observations")
# Example data (sorted)
sort(round(num, 1))
[1] 48.1 48.1 49.0 49.3 49.3 49.5 49.8 49.9 50.1 50.3 50.4 51.4
# # Stem-and-leave plot
stem(round(num, 1))
The decimal point is at the |
48 | 11
49 | 033589
50 | 134
51 | 4
plot(states[, 1:2], pch = 21, bg = col[1])
title("Association between Illiteracy and Life Expectancy")
n
# Box plot with slant axis
op <- par("mar")
par(mar = c(7, 4, 4, 2) + 0.1)
# Plot without axis
boxplot(states$`Life Exp`~state.division, col = col[1], xaxt = "n", xlab = "")
# Add axis without labels
axis(1, labels = FALSE)
# Labels as levels of categorical variable
labs <- levels(state.division)
# Add labels
text(1:length(labs), par("usr")[3] - 0.25, srt = 45, adj = 1, labels = labs, xpd = TRUE)
# Add xlab
mtext("Divisions", side = 1, line = 6, font = 2)
# Annotate plot
title("Life expectancy for each US division", ylab = "Life expectancy")
# Reset parameter
par(mar = op)
# Comparing lengths (Sepal and Petal)
boxplot(iris[, c("Sepal.Length", "Petal.Length")], col = col)
title("Comparing length of Irises of Gaspe Peninsula")
# Comparing width (Sepal and Petal)
boxplot(iris[, c("Sepal.Width", "Petal.Width")], col = col)
title("Comparing width of Irises of Gaspe Peninsula")
# High level functions
boxplot(iris$Sepal.Length~iris$Species, col = col[1], ylim = c(min(iris$Petal.Length) - 0.1, max(iris$Sepal.Length) + 0.1))
boxplot(iris$Petal.Length~iris$Species, col = 4, add = TRUE)
# Low level functions
legend("bottomright", c("Sepal", "Petal"), pch = 22, pt.bg = c(col[1], 4), title = "Iris Type", cex = 0.75)
title("Comparison of Iris Length by species", xlab = "Species", ylab = "Length")
# High level functions
boxplot(iris$Sepal.Width~iris$Species, col = col[1], ylim = c(min(iris$Petal.Width) - 0.1, max(iris$Sepal.Width) + 0.1))
boxplot(iris$Petal.Width~iris$Species, col = 4, add = TRUE)
# Low level functions
legend("bottomright", c("Sepal", "Petal"), pch = 22, pt.bg = c(col[1], 4), title = "Iris Type", cex = 0.75)
title("Comparison of Iris Width by species", xlab = "Species", ylab = "Width")