Top Banner
Exploring Data with R Abhik Seal May 8, 2014 This is a introductory tutorial to get you started with Visualization data and Exploring Data with R. There are some popular books and many online materials i will Provide the links and references at the end of the tutorial. library(ggplot2) library(gcookbook) Scatter Plots and line plots plot(cars$dist~cars$speed, # y~x main="Relationship between car distance & speed", #Plot Title xlab="Speed (miles per hour)", #X axis title ylab="Distance travelled (miles)", #Y axis title xlim=c(0,30), #Set x axis limits from 0 to 30 yaxs="i", #Set y axis style as internal col="red", #Set the colour of plotting symbol to red pch=19) #Set the plotting symbol to filled dots 0 5 10 15 20 25 30 20 40 60 80 120 Relationship between car distance & spee Speed (miles per hour) Distance travelled (miles) Let’s draw vertical error bars with 5% errors on our cars scatterplot using arrows function 1
55

Visualization

Jul 20, 2016

Download

Documents

Abhik Seal

Exploring data with R
Welcome message from author
This document is posted to help you gain knowledge. Please leave a comment to let me know what you think about it! Share it to your friends and learn new things together.
Transcript
Page 1: Visualization

Exploring Data with RAbhik Seal

May 8, 2014

This is a introductory tutorial to get you started with Visualization data and Exploring Data with R. Thereare some popular books and many online materials i will Provide the links and references at the end of thetutorial.

library(ggplot2)library(gcookbook)

Scatter Plots and line plots

plot(cars$dist~cars$speed, # y~xmain="Relationship between car distance & speed", #Plot Titlexlab="Speed (miles per hour)", #X axis titleylab="Distance travelled (miles)", #Y axis titlexlim=c(0,30), #Set x axis limits from 0 to 30yaxs="i", #Set y axis style as internalcol="red", #Set the colour of plotting symbol to redpch=19) #Set the plotting symbol to filled dots

0 5 10 15 20 25 30

2040

6080

120

Relationship between car distance & speed

Speed (miles per hour)

Dis

tanc

e tr

avel

led

(mile

s)

Let’s draw vertical error bars with 5% errors on our cars scatterplot using arrows function

1

Page 2: Visualization

plot(mpg~disp,data=mtcars)arrows(x0=mtcars$disp,

y0=mtcars$mpg*0.95,x1=mtcars$disp,y1=mtcars$mpg*1.05,angle=90,code=3,length=0.04,lwd=0.4)

100 200 300 400

1015

2025

30

disp

mpg

How to draw histograms in the top and right margins of a bivariate scatter plot

layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE)par(mar=c(5.1,4.1,0.1,0))plot(cars$dist~cars$speed, # y~x

xlab="Speed (miles per hour)", #X axis titleylab="Distance travelled (miles)", #Y axis titlexlim=c(0,30), #Set x axis limits from 0 to 30 ylim=c(0,140), #Set y axis limits from 0 to 30140 xaxs="i", #Set x axis style as internalyaxs="i", #Set y axis style as internalcol="red", #Set the colour of plotting symbol to redpch=19) #Set the plotting symbol to filled dots

par(mar=c(0,4.1,3,0))hist(cars$speed,ann=FALSE,axes=FALSE,col="black",border="white")

yhist <- hist(cars$dist,plot=FALSE)par(mar=c(5.1,0,0.1,1))barplot(yhist$density,

2

Page 3: Visualization

horiz=TRUE,space=0,axes=FALSE,col="black",border="white")

0 5 10 15 20 25 30

2040

6080

100

120

Speed (miles per hour)

Dis

tanc

e tr

avel

led

(mile

s)

#Using ggplot libraryggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()

3

Page 4: Visualization

10

15

20

25

30

35

2 3 4 5wt

mpg

# Multiple lines in a plotplot(pressure$temperature, pressure$pressure, type="l")points(pressure$temperature, pressure$pressure)

lines(pressure$temperature, pressure$pressure/2, col="red")points(pressure$temperature, pressure$pressure/2, col="red")

4

Page 5: Visualization

0 50 150 250 350

020

040

060

080

0

pressure$temperature

pres

sure

$pre

ssur

e

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()

0

200

400

600

800

0 100 200 300temperature

pres

sure

5

Page 6: Visualization

# Lines and points togetherggplot(pressure, aes(x=temperature, y=pressure)) +

geom_line() +geom_point()

0

200

400

600

800

0 100 200 300temperature

pres

sure

# Showing Lines Along the Axesggplot(pressure, aes(x=temperature, y=pressure)) +

geom_line() + geom_point() +theme(axis.line = element_line(colour="black"))

6

Page 7: Visualization

0

200

400

600

800

0 100 200 300temperature

pres

sure

# Logarithmic axisggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() +

geom_point() +theme(axis.line = element_line(colour="black")) +scale_x_log10() + scale_y_log10()

7

Page 8: Visualization

1e−03

1e−01

1e+01

1e+03

100temperature

pres

sure

From library(gcookbook) I am using heightweight dataset to group data points by variables, The groupingvariable must be categorical—in other words, a factor or character vector.

# Other shapes and color can be used by scale_shape_manual() scale_colour_manual()ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex, colour=sex)) +

geom_point()

8

Page 9: Visualization

50

55

60

65

70

12 14 16ageYear

heig

htIn sex

f

m

# Change shape of pointsggplot(heightweight, aes(x=ageYear, y=heightIn)) +

geom_point(shape=3)

50

55

60

65

70

12 14 16ageYear

heig

htIn

9

Page 10: Visualization

# Change point size sex is categoricalggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex)) +

geom_point(size=3) +scale_shape_manual(values=c(1, 4))

50

55

60

65

70

12 14 16ageYear

heig

htIn sex

f

m

# Represent a third continuous variable using color or size.

ggplot(heightweight, aes(x=weightLb, y=heightIn, fill=ageYear)) +geom_point(shape=21, size=2.5) +scale_fill_gradient(low="black", high="white", breaks=12:17,

guide=guide_legend())

10

Page 11: Visualization

50

55

60

65

70

50 75 100 125 150 175weightLb

heig

htIn

ageYear

12

13

14

15

16

17

Adding Fitted Regression Model Lines

sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn))sp + geom_point() + stat_smooth(method=lm)

50

55

60

65

70

12 14 16ageYear

heig

htIn

11

Page 12: Visualization

# Adding annotations to regression plotmodel <- lm(heightIn ~ ageYear, heightweight)summary(model)# First generate prediction data# Given a model, predict values of yvar from xvar# This supports one predictor and one predicted variable# xrange: If NULL, determine the x range from the model object. If a vector with# two numbers, use those as the min and max of the prediction range.# samples: Number of samples across the x range.# ...: Further arguments to be passed to predict()predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {

# If xrange isn't passed in, determine xrange from the models.# Different ways of extracting the x range, depending on model typeif (is.null(xrange)) {

if (any(class(model) %in% c("lm", "glm")))xrange <- range(model$model[[xvar]])

else if (any(class(model) %in% "loess"))xrange <- range(model$x)

}

newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))names(newdata) <- xvarnewdata[[yvar]] <- predict(model, newdata = newdata, ...)newdata

}

pred <- predictvals(model, "ageYear", "heightIn")sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) +

geom_point() +geom_line(data=pred)

sp + annotate("text", label="r^2 == 0.42", x=16.5, y=52,parse=TRUE)

12

Page 13: Visualization

r2 = 0.4250

55

60

65

70

12 14 16ageYear

heig

htIn

Scatter plot matrix and correlation matrix using mtcars dataset and first five variables

library(corrplot)pairs(mtcars[,1:5])

mpg

4 6 8 50 250

1025

46

8

cyl

disp

100

400

5025

0

hp

10 25 100 400 3.0 4.5

3.0

4.5

drat

13

Page 14: Visualization

# Scatter plot with correlations in the upper triangle, smoothing lines in the# lower triangle, and histograms on the diagonalpanel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {usr <- par("usr")on.exit(par(usr))par(usr = c(0, 1, 0, 1))r <- abs(cor(x, y, use="complete.obs"))txt <- format(c(r, 0.123456789), digits=digits)[1]txt <- paste(prefix, txt, sep="")if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)

}panel.hist <- function(x, ...) {usr <- par("usr")on.exit(par(usr))par(usr = c(usr[1:2], 0, 1.5) )h <- hist(x, plot = FALSE)breaks <- h$breaksnB <- length(breaks)y <- h$countsy <- y/max(y)rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)

}

pairs(mtcars[,1:5], upper.panel = panel.cor,diag.panel = panel.hist,lower.panel = panel.smooth)

mpg4 6 8

0.85 0.85

50 250

0.78

10250.68

46

8 cyl0.90 0.83 0.70

disp0.79

100

400

0.71

5025

0 hp0.45

10 25 100 400 3.0 4.5

3.0

4.5drat

14

Page 15: Visualization

mcor <- cor(mtcars)corrplot(mcor)

−1

−0.8

−0.6

−0.4

−0.2

0

0.2

0.4

0.6

0.8

1

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# Correlation matrix with colored squares and black, rotated labelscorrplot(mcor, method="shade", shade.col=NA, tl.col="black", tl.srt=45)

15

Page 16: Visualization

−1

−0.8

−0.6

−0.4

−0.2

0

0.2

0.4

0.6

0.8

1m

pgcy

ldis

php dr

atwt qs

ecvs am ge

arca

rb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# create a three-dimensional (3D) scatter plot.library(rgl)plot3d(mtcars$wt, mtcars$disp, mtcars$mpg, type="s", size=0.75, lit=FALSE)

# add vertical segments to help give a sense of the spatial positions of the points

interleave <- function(v1, v2) as.vector(rbind(v1,v2))# Plot the pointsplot3d(mtcars$wt, mtcars$disp, mtcars$mpg,

xlab="Weight", ylab="Displacement", zlab="MPG",size=.75, type="s", lit=FALSE)

# Add the segmentssegments3d(interleave(mtcars$wt, mtcars$wt),

interleave(mtcars$disp, mtcars$disp),interleave(mtcars$mpg, min(mtcars$mpg)),alpha=0.4, col="blue")

Scattter plot with jitter rugs,spikes and density

x <- rnorm(1000, 50, 30)y <- 3*x + rnorm(1000, 0, 20)require(Hmisc)plot(x,y)#scat1d adds tick marks (bar codes. rug plot)# on any of the four sides of an existing plot,# corresponding with non-missing values of a vector x.scat1d(x, col = "red") # density bars on top of graphscat1d(y, 4, col = "blue") # density bars at right

16

Page 17: Visualization

−50 0 50 100 150

−20

00

100

200

300

400

x

y

plot(x,y, pch = 20)histSpike(x, add=TRUE, col = "green4", lwd = 2)histSpike(y, 4, add=TRUE,col = "blue", lwd = 2 )histSpike(x, type='density',col = "red", add=TRUE) # smooth density at bottomhistSpike(y, 4, type='density', col = "red", add=TRUE)

17

Page 18: Visualization

−50 0 50 100 150

−20

00

100

200

300

400

x

y

Bar graphs and Histograms

barplot(BOD$demand, names.arg=BOD$Time)

18

Page 19: Visualization

1 2 3 4 5 7

05

1015

# Using the table functionbarplot(table(mtcars$cyl))

4 6 8

02

46

810

14

19

Page 20: Visualization

qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")

0

5

10

15

20

2 4 6BOD$Time

BO

D$d

eman

d

# Conisdering facotrqplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")

20

Page 21: Visualization

0

5

10

15

20

1 2 3 4 5 7factor(BOD$Time)

BO

D$d

eman

d

# cyl is continuous hereqplot(mtcars$cyl)

0

5

10

4 5 6 7 8mtcars$cyl

coun

t

21

Page 22: Visualization

# Treat cyl as discreteqplot(factor(mtcars$cyl))

0

5

10

4 6 8factor(mtcars$cyl)

coun

t

# Bar graph of values. This uses the BOD data frame, with the# "Time" column for x values and the "demand" column for y values.ggplot(BOD, aes(x=Time, y=demand)) +

geom_bar(stat="identity")

22

Page 23: Visualization

0

5

10

15

20

2 4 6Time

dem

and

ggplot(mtcars, aes(x=factor(cyl))) +geom_bar(fill="white",color="black")

0

5

10

4 6 8factor(cyl)

coun

t

23

Page 24: Visualization

# Specify approximate number of bins with breaksggplot(mtcars, aes(x=mpg)) +

geom_histogram(binwidth=4,fill="white", colour="black")

0

2

4

6

8

10 20 30 40mpg

coun

t

# Change the x axis origin using origin parameterggplot(mtcars, aes(x=mpg)) +

geom_histogram(binwidth=4,fill="white", colour="black",origin=20)

24

Page 25: Visualization

0

2

4

6

20 25 30 35mpg

coun

t

Histograms of multiple groups of data

library(MASS)ggplot(heightweight, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") +facet_grid(sex ~ .)

25

Page 26: Visualization

0

5

10

15

20

0

5

10

15

20

fm

50 55 60 65 70heightIn

coun

t

hw<-heightweight

# Using plyr and revalue() to change the names on sex variablelibrary(plyr)hw$sex<- revalue(hw$sex,c("f"="Female","m"="Male"))

# Using facettingggplot(hw, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") +facet_grid(sex ~ .)

26

Page 27: Visualization

0

5

10

15

20

0

5

10

15

20

Fem

aleM

ale

50 55 60 65 70heightIn

coun

t

ggplot(hw, aes(x=heightIn, y = ..density.. ,fill=sex)) +geom_histogram(position="identity",alpha=0.4)+theme_bw()+geom_density(alpha=0.3)

0.00

0.05

0.10

0.15

0.20

0.25

50 55 60 65 70heightIn

dens

ity

sex

Female

Male

27

Page 28: Visualization

Negative and Positive Bar plot

csub <- subset(climate, Source=="Berkeley" & Year >= 1900)head(csub)csub$pos <- csub$Anomaly10y >= 0ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +

geom_bar(stat="identity", color="black",position="identity")

0.0

0.5

1920 1950 1980Year

Ano

mal

y10y pos

FALSE

TRUE

Error Bar plot in ggplot2

myd <- data.frame (X = c(1:12,1:12),Y = c(8, 12, 13, 18, 22, 16, 24, 29, 34, 15, 8, 6,

9, 10, 12, 18, 26, 28, 28, 30, 20, 10, 9, 9),group = rep (c("X-Group", "Y-group"), each = 12),error = rep (c(2.5, 3.0), each = 12))

plt = ggplot(data = myd, aes(x=X, y=Y, fill=group, width=0.8) ) +geom_errorbar(aes(ymin=Y, ymax=Y+error, width = 0.2),

position=position_dodge(width=0.8)) +geom_bar(stat="identity", position=position_dodge(width=0.8)) +geom_bar(stat="identity", position=position_dodge(width=0.8),

colour="black", legend=FALSE) +scale_fill_manual(values=c("grey70", "white")) +scale_x_discrete("X", limits=c(1:12)) +scale_y_continuous("Y (units)", expand=c(0,0),

limits = c(0, 40), breaks=seq(0, 40, by=5)) +ggtitle ("My nice plot") +theme_bw() +

theme( plot.title = element_text(face="bold", size=14),

28

Page 29: Visualization

axis.title.x = element_text(face="bold", size=12),axis.title.y = element_text(face="bold", size=12, angle=90),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text.y=element_text(angle=90, hjust=0.5),legend.title = element_blank(),legend.position = c(0.85,0.85),legend.key.size = unit(1.5, "lines"),legend.key = element_rect()

)

plt

05

1015

2025

3035

40

1 2 3 4 5 6 7 8 9 10 11 12X

Y (

units

)

X−Group

Y−group

My nice plot

Box plots

# Using the ToothGrowth dataset# Formula syntaxboxplot(len ~ supp, data = ToothGrowth)

29

Page 30: Visualization

OJ VC

510

1520

2530

35

# Put interaction of two variables on x-axisboxplot(len ~ supp + dose, data = ToothGrowth)

OJ.0.5 OJ.1 OJ.2

510

1520

2530

35

30

Page 31: Visualization

ggplot(ToothGrowth, aes(x=supp, y=len)) +geom_boxplot()

10

20

30

OJ VCsupp

len

# Adding notchesggplot(ToothGrowth, aes(x=supp, y=len)) +

geom_boxplot(notch=TRUE)

31

Page 32: Visualization

10

20

30

OJ VCsupp

len

# Adding meanggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() +

stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")

10

20

30

OJ VCsupp

len

32

Page 33: Visualization

# Using three separate vectorsggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) +

geom_boxplot()

10

20

30

OJ.0.5 VC.0.5 OJ.1 VC.1 OJ.2 VC.2interaction(supp, dose)

len

Violin plots are a way of comparing multiple data distributions

# Use the heightweight datasetsp <- ggplot(heightweight, aes(x=sex, y=heightIn))p + geom_violin(trim=FALSE,adjuts=2)+

geom_boxplot(width=.1, fill="Grey", outlier.colour=NA)+theme_bw()+stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")

33

Page 34: Visualization

50

60

70

f msex

heig

htIn

Plotting curves

curve(x^3 - 5*x, from=-4, to=4)

34

Page 35: Visualization

−4 −2 0 2 4

−40

−20

020

40

x

x^3

− 5

* x

# Plot a user-defined functionmyfun <- function(xvar) {

1/(1 + exp(-xvar + 10))}

curve(myfun(x), from=0, to=20)# Add a line:curve(1-myfun(x), add = TRUE, col = "red")

35

Page 36: Visualization

0 5 10 15 20

0.0

0.2

0.4

0.6

0.8

1.0

x

myf

un(x

)

# This sets the x range from 0 to 20ggplot(data.frame(x=c(0, 20)), aes(x=x)) +

stat_function(fun=myfun, geom="line")

0.00

0.25

0.50

0.75

1.00

0 5 10 15 20x

y

36

Page 37: Visualization

Miscellaneous plots

Making Density Plot of Two-Dimensional Data

p <- ggplot(faithful, aes(x=eruptions, y=waiting))p + geom_point() + stat_density2d()

50

60

70

80

90

2 3 4 5eruptions

wai

ting

p + stat_density2d(aes(colour=..level..))

37

Page 38: Visualization

50

60

70

80

90

2 3 4 5eruptions

wai

ting

0.005

0.010

0.015

0.020

level

p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)

50

60

70

80

90

2 3 4 5eruptions

wai

ting

0.005

0.010

0.015

0.020

0.025density

38

Page 39: Visualization

# With points, and map density estimate to alphap + geom_point() +

stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)

50

60

70

80

90

2 3 4 5eruptions

wai

ting

density

0.005

0.010

0.015

0.020

0.025

Plotting Pie Charts

library(RColorBrewer)slices <- c(10, 12,4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pie(slices, labels = lbls, main="Pie Chart of Countries",col=brewer.pal(7,"Set1"))

39

Page 40: Visualization

IN

AK

ID

MA

MO

Pie Chart of Countries

Pie Chart with Percentages

slices <- c(10, 12, 4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pct <- round(slices/sum(slices)*100)lbls <- paste(lbls, pct) # add percents to labelslbls <- paste(lbls,"%",sep="") # ad % to labelspie(slices,labels = lbls, col=rainbow(length(lbls)),

main="Pie Chart of US States")

40

Page 41: Visualization

IN 20%

AK 24%

ID 8%

MA 32%

MO 16%

Pie Chart of US States

3D Pie chart

library(plotrix)slices <- c(10, 12, 4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pie3D(slices,labels=lbls,explode=0.1,

main="Pie Chart of Countries ",col=brewer.pal(7,"Set1"))

41

Page 42: Visualization

Pie Chart of Countries

INAK

ID

MA

MO

A dendrogram is the fancy word that we use to name a tree diagram to display the groups formed byhierarchical clustering. # Using Corrgrams package

library(corrgram)R <- cor(mtcars)# default corrgramcorrgram(R)

42

Page 43: Visualization

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# corrgram with pie chartscorrgram(R, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,

text.panel = panel.txt, main = "mtcars Data")

gear

am

drat

mpg

vs

qsec

wt

disp

cyl

hp

carb

mtcars Data

43

Page 44: Visualization

The package ellipse provides the function plotcorr() that helps us to visualize correlations. plotcorr() usesellipse-shaped glyphs for each entry of the correlation matrix. Here’s the default plot using our matrix of R:

# default corrgramlibrary(ellipse)plotcorr(R)

mpgcyl

disphp

dratwt

qsecvs

amgearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

# colored corrgramplotcorr(R, col = colorRampPalette(c("firebrick3", "white", "navy"))(10))

44

Page 45: Visualization

mpgcyl

disphp

dratwt

qsecvs

amgearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

Another colored corrgram

plotcorr(R, col = colorRampPalette(c("#E08214", "white", "#8073AC"))(10), type = "lower")

cyldisp

hpdrat

wtqsec

vsam

gearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

45

Page 46: Visualization

Visualizing Dendrograms

# prepare hierarchical clusterhc = hclust(dist(mtcars))plot(hc, hang = -1) ## labels at the same level

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

030

0

Cluster Dendrogram

hclust (*, "complete")dist(mtcars)

Hei

ght

An alternative way to produce dendrograms is to specifically convert hclust objects into dendrograms objects.

# using dendrogram objectshcd = as.dendrogram(hc)# alternative way to get a dendrogramplot(hcd)

46

Page 47: Visualization

010

020

030

040

0

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

Having an object of class dendrogram, we can also plot the branches in a triangular form.

# using dendrogram objectsplot(hcd, type = "triangle")

47

Page 48: Visualization

010

020

030

040

0

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

Phylogenetic trees

library(ape)# plot basic treeplot(as.phylo(hc), cex = 0.9, label.offset = 1)

48

Page 49: Visualization

Mazda RX4Mazda RX4 Wag

Datsun 710

Hornet 4 DriveHornet Sportabout

Valiant

Duster 360

Merc 240D

Merc 230

Merc 280Merc 280C

Merc 450SEMerc 450SLMerc 450SLC

Cadillac FleetwoodLincoln ContinentalChrysler Imperial

Fiat 128Honda CivicToyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Camaro Z28Pontiac Firebird

Fiat X1−9

Porsche 914−2

Lotus Europa

Ford Pantera L

Ferrari Dino

Maserati Bora

Volvo 142E

# fanplot(as.phylo(hc), type = "fan")

49

Page 50: Visualization

Mazda RX4

Mazda RX4 WagD

atsu

n 71

0H

ornet 4 Drive H

orne

t Spo

rtab

out

Valiant

Dus

ter 3

60

Merc 240D

Mer

c 23

0

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetwood

Lincoln Contin

ental

Chrysler Imperial

Fiat 128

Honda Civic

Toyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Cam

aro

Z28

Pon

tiac

Fire

bird

Fiat X1−9

Porsche 914−2Lo

tus

Euro

pa

Ford

Pan

tera

L

Ferra

ri Dino

Maserati Bora

Volv

o 14

2E

# add colors randomlyplot(as.phylo(hc), type = "fan", tip.color = hsv(runif(15, 0.65,

0.95), 1, 1, 0.7),edge.color = hsv(runif(10, 0.65, 0.75), 1, 1, 0.7),edge.width = runif(20,0.5, 3), use.edge.length = TRUE, col = "gray80")

50

Page 51: Visualization

Mazda RX4

Mazda RX4 WagD

atsu

n 71

0H

ornet 4 Drive H

orne

t Spo

rtab

out

Valiant

Dus

ter 3

60

Merc 240D

Mer

c 23

0

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetwood

Lincoln Contin

ental

Chrysler Imperial

Fiat 128

Honda Civic

Toyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Cam

aro

Z28

Pon

tiac

Fire

bird

Fiat X1−9

Porsche 914−2Lo

tus

Euro

pa

Ford

Pan

tera

L

Ferra

ri Dino

Maserati Bora

Volv

o 14

2E

Triple heat map plot

library(reshape2)library (grid)library(ggplot2)

#X axis quantitaive ggplot datadatfx <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))),matrix(sample(LETTERS[1:7],80, T), ncol = 4))

# converting data to long form for ggplot2 usedatf1x <- melt(datfx, id.var = 'indv')

plotx <- ggplot(datf1x, aes(indv, variable)) +geom_tile(aes(fill = value),colour = "white") +scale_fill_manual(values= terrain.colors(7))+scale_x_discrete(expand=c(0,0))

px <- plotx#Y axis quantitaive ggplot datadatfy <- data.frame(indv=factor(paste("ID", 21:40, sep = ""),

levels =rev(paste("ID",21:40, sep = ""))), matrix(sample(LETTERS[7:10],100, T), ncol = 5))# converting data to long form for ggplot2 usedatf1y <- melt(datfy, id.var = 'indv')

ploty <- ggplot(datf1y, aes( variable, indv)) + geom_tile(aes(fill = value),colour = "white") +

scale_fill_manual(values= c("cyan4", "midnightblue", "green2", "lightgreen")) +scale_x_discrete(expand=c(0,0))

51

Page 52: Visualization

py <- ploty + theme(legend.position="left", axis.title=element_blank())

# plot XY quantative filldatfxy <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))), matrix(rnorm (400, 50, 10), ncol = 20))names (datfxy) <- c("indv",paste("ID", 21:40, sep = ""))datfxy <- melt(datfxy, id.var = 'indv')levels (datfxy$ variable) <- rev(paste("ID", 21:40, sep = ""))

pxy <- plotxy <- ggplot(datfxy, aes(indv, variable)) +geom_tile(aes(fill = value),colour = "white") +scale_fill_gradient(low="red", high="yellow") +theme(axis.title=element_blank())

# Define layout for the plots (2 rows, 2 columns)layt<-grid.layout(nrow=2,ncol=2,heights=c(6/8,2/8),widths=c(2/8,6/8),default.units=c('null','null'))#View the layout of plotsgrid.show.layout(layt)

52

Page 53: Visualization

(1, 1)0.75null

0.25null

(1, 2) 0.75null

0.75null

(2, 1)0.25null

0.25null

(2, 2)

0.75null

0.25null

#Draw plots one by one in their positionsgrid.newpage()pushViewport(viewport(layout=layt))print(py,vp=viewport(layout.pos.row=1,layout.pos.col=1))print(pxy,vp=viewport(layout.pos.row=1,layout.pos.col=2))print(px,vp=viewport(layout.pos.row=2,layout.pos.col=2))

53

Page 54: Visualization

ID40

ID39

ID38

ID37

ID36

ID35

ID34

ID33

ID32

ID31

ID30

ID29

ID28

ID27

ID26

ID25

ID24

ID23

ID22

ID21

X1X2X3X4X5

value

G

H

I

J

ID40

ID39

ID38

ID37

ID36

ID35

ID34

ID33

ID32

ID31

ID30

ID29

ID28

ID27

ID26

ID25

ID24

ID23

ID22

ID21

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1

30

40

50

60

70

value

X1

X2

X3

X4

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1indv

varia

ble

value

A

B

C

D

E

F

G

Mosaic plot for categorical data

myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE),fact2 = sample (c("HL", "PS", "DS"), 200, replace = TRUE),fact3 = sample (c("Male", "Female"), 200, replace = TRUE))

#plot# vcd package is for visualization of categorical datarequire(vcd)mytable <- table (myd)mosaic(mytable, shade=TRUE, legend=TRUE)

54