Hellen Gakuruh
2017-04-03
What we will cover:
n
X11()
IN UNIX, windows()
in Windows and quartz()
in macplot()
plot()
can produce a variety of different plots depending on type/class of first argument (hence, plot()
is completely reliant on class(object)
)class(x) = "factor"
, a bar plot is producedclass(x) == "character"
as plot needs a finite object to set a plotting windowplot()
will make plots per elements type plot(obj)
(without changing/giving other arguments)n
ts <- ts(rnorm(12, 50), start = 1, end = 12, frequency = 1)
class(ts)
[1] "ts"
n
plot(ts)
n
num <- rnorm(12, 50)
class(num)
[1] "numeric"
n
plot(num)
n
fac <- factor(sample(c("Y", "N"), 100, T, c(0.7, 0.3)))
class(fac)
[1] "factor"
n
plot(fac)
n
num2 <- rnorm(12, 88)
class(num2)
[1] "numeric"
n
plot(num, num2)
n
set.seed(5)
num3 <- rnorm(100, 88)
class(num3)
[1] "numeric"
n
plot(fac, num3)
n
fac2 <- factor(sample(c("F", "M"), 100, T, c(0.8, 0.2)))
class(fac2)
[1] "factor"
n
plot(fac, fac2)
plot()
depends on first (and “y”) argument, but how it is generated depends on values parsed to other argumentpar()
… more on this later (read ?par
)hist()
for histograms (univariate continuous distributions)boxplot()
for box-and-whiskers plot (for univariate numerical variables alone or categorised by a categorical variable)barplot()
for bar plots (for categorical distribution)pie()
for pie chart (for categorical distribution)point()
, lines()
, text()
, title()
, abline()
, polygon()
, legend()
, and axis()
locator()
and identify()
locator(n, type)
: one can select “n” number of points using left mouse button and if type is not specified, a list with two components x and y is outputted otherwise plotting over selected points given “type” is donelocator()
is particularly handy in locating position for legends, and labels e.g. text(locator(1), "Outlier", adj=0)
identify(x, y, labels)
is used to highlight any of the points defined by x and y (using left mouse button)par()
for complete listpar("parameter")
e.g. par("mfrow")
set.seed(5)
response <- sample(c("Yes", "No"), 300, T, c(0.68, 0.32))
tab_response <- table(response)
pie(tab_response, col = c("#99CCFF", "#6699CC"))
labs <- paste0("(", round(as.vector(prop.table(tab_response)*100)), "%)")
text(x = c(0.78, -0.50), y = c(0.80, -1), labels = c(labs[1], labs[2]))
barplot(sort(tab_response, decreasing = TRUE), las = 1, col = c("#6699CC", "#99CCFF"))
title("Bar chart", xlab = "Response", ylab = "Frequency")
set.seed(5)
months <- sample(month.abb[c(3, 6, 9, 12)], size = 300, replace = TRUE)
tab_months <- table(months)[c("Mar", "Jun", "Sep", "Dec")]
tab_months
months
Mar Jun Sep Dec
81 78 60 81
n
dotchart(as.numeric(tab_months), xlab = "Total student's Trained", ylab = "Quarters", bg = 4)
title("Total students trained by quarters (2016)", sub = "Data Mania Inc.,", font.sub = 3, col.sub = "#6699CC", cex.sub = 0.9)
axis(2, at = 1:4, labels = names(tab_months), las = 2)
set.seed(5)
gender <- sample(c("Female", "Male"), 300, TRUE, c(0.7, 0.3))
monthgen_tab <- table(gender, months)[, c("Dec", "Sep", "Jun", "Mar")]
monthgen_tab
months
gender Dec Sep Jun Mar
Female 0 49 78 81
Male 81 11 0 0
barplot(monthgen_tab, col = c("#6699CC", "#99CCFF"), beside = TRUE)
legend("topright", legend = c("Female", "Male"), pch = 22 , pt.bg = c("#6699CC", "#99CCFF"), xpd = TRUE, cex = 0.75)
title("Student's trained by gender and month (2016)", xlab = "Month", ylab = "Number trained", sub = "Data Mania Inc.", cex.sub = 0.9, col.sub = "#6699CC", font.sub = 3)
dotchart(as.matrix(monthgen_tab)[, c("Mar", "Jun", "Sep", "Dec")], bg = 4, xlab = "Total number of student's trained")
title("Total student's trained by gender and month", sub = "Data Mania Inc.", font.sub = 3, cex.sub = 0.9, col.sub = "#6699CC")
title(ylab = "Gender and month", line = 2.5)
# Convert Titanic data
titanic_passengers <- colSums(Titanic[-4,,,])
titanic_passengers
, , Survived = No
Age
Sex Child Adult
Male 35 659
Female 17 106
, , Survived = Yes
Age
Sex Child Adult
Male 29 146
Female 28 296
n
# Plotting four fold plot
fourfoldplot(titanic_passengers, std = "margins")
mosaicplot(titanic_passengers, color = TRUE)
# Height of tiles
prop.table(apply(titanic_passengers, 1:2, sum), 1)
Age
Sex Child Adult
Male 0.07364787 0.9263521
Female 0.10067114 0.8993289
# Example data: Edgar Anderson's Iris Data
sepal <- iris$Sepal.Length
sepal
[1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
[18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
[35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
[52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
[69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
[86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
[103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
[120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
[137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
op <- par("mfrow")
par(mfrow = c(1, 2))
hist(sepal, col = "#99CCFF", ann = FALSE)
title("Breaks = 10", xlab = "Sepal Length", ylab = "Frequency")
hist(sepal, nclass = 15, col = "#6699cc", ann = FALSE)
title("Breaks = 15", xlab = "Sepal Length", ylab = "Frequency")
par(mfrow = op)
dens_sepal <- density(sepal)
plot(dens_sepal, type = "n")
polygon(dens_sepal, col = "#99CCFF")
states <- as.data.frame(state.x77[, c("Illiteracy", "Life Exp", "Murder", "HS Grad")])
# Layout (1 row by 2 columns)
op <- par("mfrow")
par(mfrow = c(1, 2))
# Visualise distributions
boxplot(states$Illiteracy, col = "#99CCFF")
boxplot(states$'Life Exp', col = "#6699CC")
# Reset original layout
par(mfrow = op)
stripchart(round(num, 1), pch = 22, bg = col[1])
title("Dot plot for small sample size", xlab = "Observations")
# Example data (sorted)
sort(round(num, 1))
[1] 48.1 48.1 49.0 49.3 49.3 49.5 49.8 49.9 50.1 50.3 50.4 51.4
# # Stem-and-leave plot
stem(round(num, 1))
The decimal point is at the |
48 | 11
49 | 033589
50 | 134
51 | 4
plot(states[, 1:2], pch = 21, bg = col[1])
title("Association between Illiteracy and Life Expectancy")
n
# Box plot with slant axis
op <- par("mar")
par(mar = c(7, 4, 4, 2) + 0.1)
# Plot without axis
boxplot(states$`Life Exp`~state.division, col = col[1], xaxt = "n", xlab = "")
# Add axis without labels
axis(1, labels = FALSE)
# Labels as levels of categorical variable
labs <- levels(state.division)
# Add labels
text(1:length(labs), par("usr")[3] - 0.25, srt = 45, adj = 1, labels = labs, xpd = TRUE)
# Add xlab
mtext("Divisions", side = 1, line = 6, font = 2)
# Annotate plot
title("Life expectancy for each US division", ylab = "Life expectancy")
# Reset parameter
par(mar = op)
# Comparing lengths (Sepal and Petal)
boxplot(iris[, c("Sepal.Length", "Petal.Length")], col = col)
title("Comparing length of Irises of Gaspe Peninsula")
# Comparing width (Sepal and Petal)
boxplot(iris[, c("Sepal.Width", "Petal.Width")], col = col)
title("Comparing width of Irises of Gaspe Peninsula")
# High level functions
boxplot(iris$Sepal.Length~iris$Species, col = col[1], ylim = c(min(iris$Petal.Length) - 0.1, max(iris$Sepal.Length) + 0.1))
boxplot(iris$Petal.Length~iris$Species, col = 4, add = TRUE)
# Low level functions
legend("bottomright", c("Sepal", "Petal"), pch = 22, pt.bg = c(col[1], 4), title = "Iris Type", cex = 0.75)
title("Comparison of Iris Length by species", xlab = "Species", ylab = "Length")
# High level functions
boxplot(iris$Sepal.Width~iris$Species, col = col[1], ylim = c(min(iris$Petal.Width) - 0.1, max(iris$Sepal.Width) + 0.1))
boxplot(iris$Petal.Width~iris$Species, col = 4, add = TRUE)
# Low level functions
legend("bottomright", c("Sepal", "Petal"), pch = 22, pt.bg = c(col[1], 4), title = "Iris Type", cex = 0.75)
title("Comparison of Iris Width by species", xlab = "Species", ylab = "Width")