Visualization

Post on 20-Jul-2016

33 Views

Category:

Documents

4 Downloads

Preview:

Click to see full reader

DESCRIPTION

Exploring data with R

Transcript

Exploring Data with RAbhik Seal

May 8, 2014

This is a introductory tutorial to get you started with Visualization data and Exploring Data with R. Thereare some popular books and many online materials i will Provide the links and references at the end of thetutorial.

library(ggplot2)library(gcookbook)

Scatter Plots and line plots

plot(cars$dist~cars$speed, # y~xmain="Relationship between car distance & speed", #Plot Titlexlab="Speed (miles per hour)", #X axis titleylab="Distance travelled (miles)", #Y axis titlexlim=c(0,30), #Set x axis limits from 0 to 30yaxs="i", #Set y axis style as internalcol="red", #Set the colour of plotting symbol to redpch=19) #Set the plotting symbol to filled dots

0 5 10 15 20 25 30

2040

6080

120

Relationship between car distance & speed

Speed (miles per hour)

Dis

tanc

e tr

avel

led

(mile

s)

Let’s draw vertical error bars with 5% errors on our cars scatterplot using arrows function

1

plot(mpg~disp,data=mtcars)arrows(x0=mtcars$disp,

y0=mtcars$mpg*0.95,x1=mtcars$disp,y1=mtcars$mpg*1.05,angle=90,code=3,length=0.04,lwd=0.4)

100 200 300 400

1015

2025

30

disp

mpg

How to draw histograms in the top and right margins of a bivariate scatter plot

layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE)par(mar=c(5.1,4.1,0.1,0))plot(cars$dist~cars$speed, # y~x

xlab="Speed (miles per hour)", #X axis titleylab="Distance travelled (miles)", #Y axis titlexlim=c(0,30), #Set x axis limits from 0 to 30 ylim=c(0,140), #Set y axis limits from 0 to 30140 xaxs="i", #Set x axis style as internalyaxs="i", #Set y axis style as internalcol="red", #Set the colour of plotting symbol to redpch=19) #Set the plotting symbol to filled dots

par(mar=c(0,4.1,3,0))hist(cars$speed,ann=FALSE,axes=FALSE,col="black",border="white")

yhist <- hist(cars$dist,plot=FALSE)par(mar=c(5.1,0,0.1,1))barplot(yhist$density,

2

horiz=TRUE,space=0,axes=FALSE,col="black",border="white")

0 5 10 15 20 25 30

2040

6080

100

120

Speed (miles per hour)

Dis

tanc

e tr

avel

led

(mile

s)

#Using ggplot libraryggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()

3

10

15

20

25

30

35

2 3 4 5wt

mpg

# Multiple lines in a plotplot(pressure$temperature, pressure$pressure, type="l")points(pressure$temperature, pressure$pressure)

lines(pressure$temperature, pressure$pressure/2, col="red")points(pressure$temperature, pressure$pressure/2, col="red")

4

0 50 150 250 350

020

040

060

080

0

pressure$temperature

pres

sure

$pre

ssur

e

ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()

0

200

400

600

800

0 100 200 300temperature

pres

sure

5

# Lines and points togetherggplot(pressure, aes(x=temperature, y=pressure)) +

geom_line() +geom_point()

0

200

400

600

800

0 100 200 300temperature

pres

sure

# Showing Lines Along the Axesggplot(pressure, aes(x=temperature, y=pressure)) +

geom_line() + geom_point() +theme(axis.line = element_line(colour="black"))

6

0

200

400

600

800

0 100 200 300temperature

pres

sure

# Logarithmic axisggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() +

geom_point() +theme(axis.line = element_line(colour="black")) +scale_x_log10() + scale_y_log10()

7

1e−03

1e−01

1e+01

1e+03

100temperature

pres

sure

From library(gcookbook) I am using heightweight dataset to group data points by variables, The groupingvariable must be categorical—in other words, a factor or character vector.

# Other shapes and color can be used by scale_shape_manual() scale_colour_manual()ggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex, colour=sex)) +

geom_point()

8

50

55

60

65

70

12 14 16ageYear

heig

htIn sex

f

m

# Change shape of pointsggplot(heightweight, aes(x=ageYear, y=heightIn)) +

geom_point(shape=3)

50

55

60

65

70

12 14 16ageYear

heig

htIn

9

# Change point size sex is categoricalggplot(heightweight, aes(x=ageYear, y=heightIn, shape=sex)) +

geom_point(size=3) +scale_shape_manual(values=c(1, 4))

50

55

60

65

70

12 14 16ageYear

heig

htIn sex

f

m

# Represent a third continuous variable using color or size.

ggplot(heightweight, aes(x=weightLb, y=heightIn, fill=ageYear)) +geom_point(shape=21, size=2.5) +scale_fill_gradient(low="black", high="white", breaks=12:17,

guide=guide_legend())

10

50

55

60

65

70

50 75 100 125 150 175weightLb

heig

htIn

ageYear

12

13

14

15

16

17

Adding Fitted Regression Model Lines

sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn))sp + geom_point() + stat_smooth(method=lm)

50

55

60

65

70

12 14 16ageYear

heig

htIn

11

# Adding annotations to regression plotmodel <- lm(heightIn ~ ageYear, heightweight)summary(model)# First generate prediction data# Given a model, predict values of yvar from xvar# This supports one predictor and one predicted variable# xrange: If NULL, determine the x range from the model object. If a vector with# two numbers, use those as the min and max of the prediction range.# samples: Number of samples across the x range.# ...: Further arguments to be passed to predict()predictvals <- function(model, xvar, yvar, xrange=NULL, samples=100, ...) {

# If xrange isn't passed in, determine xrange from the models.# Different ways of extracting the x range, depending on model typeif (is.null(xrange)) {

if (any(class(model) %in% c("lm", "glm")))xrange <- range(model$model[[xvar]])

else if (any(class(model) %in% "loess"))xrange <- range(model$x)

}

newdata <- data.frame(x = seq(xrange[1], xrange[2], length.out = samples))names(newdata) <- xvarnewdata[[yvar]] <- predict(model, newdata = newdata, ...)newdata

}

pred <- predictvals(model, "ageYear", "heightIn")sp <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) +

geom_point() +geom_line(data=pred)

sp + annotate("text", label="r^2 == 0.42", x=16.5, y=52,parse=TRUE)

12

r2 = 0.4250

55

60

65

70

12 14 16ageYear

heig

htIn

Scatter plot matrix and correlation matrix using mtcars dataset and first five variables

library(corrplot)pairs(mtcars[,1:5])

mpg

4 6 8 50 250

1025

46

8

cyl

disp

100

400

5025

0

hp

10 25 100 400 3.0 4.5

3.0

4.5

drat

13

# Scatter plot with correlations in the upper triangle, smoothing lines in the# lower triangle, and histograms on the diagonalpanel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {usr <- par("usr")on.exit(par(usr))par(usr = c(0, 1, 0, 1))r <- abs(cor(x, y, use="complete.obs"))txt <- format(c(r, 0.123456789), digits=digits)[1]txt <- paste(prefix, txt, sep="")if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)

}panel.hist <- function(x, ...) {usr <- par("usr")on.exit(par(usr))par(usr = c(usr[1:2], 0, 1.5) )h <- hist(x, plot = FALSE)breaks <- h$breaksnB <- length(breaks)y <- h$countsy <- y/max(y)rect(breaks[-nB], 0, breaks[-1], y, col="white", ...)

}

pairs(mtcars[,1:5], upper.panel = panel.cor,diag.panel = panel.hist,lower.panel = panel.smooth)

mpg4 6 8

0.85 0.85

50 250

0.78

10250.68

46

8 cyl0.90 0.83 0.70

disp0.79

100

400

0.71

5025

0 hp0.45

10 25 100 400 3.0 4.5

3.0

4.5drat

14

mcor <- cor(mtcars)corrplot(mcor)

−1

−0.8

−0.6

−0.4

−0.2

0

0.2

0.4

0.6

0.8

1

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# Correlation matrix with colored squares and black, rotated labelscorrplot(mcor, method="shade", shade.col=NA, tl.col="black", tl.srt=45)

15

−1

−0.8

−0.6

−0.4

−0.2

0

0.2

0.4

0.6

0.8

1m

pgcy

ldis

php dr

atwt qs

ecvs am ge

arca

rb

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# create a three-dimensional (3D) scatter plot.library(rgl)plot3d(mtcars$wt, mtcars$disp, mtcars$mpg, type="s", size=0.75, lit=FALSE)

# add vertical segments to help give a sense of the spatial positions of the points

interleave <- function(v1, v2) as.vector(rbind(v1,v2))# Plot the pointsplot3d(mtcars$wt, mtcars$disp, mtcars$mpg,

xlab="Weight", ylab="Displacement", zlab="MPG",size=.75, type="s", lit=FALSE)

# Add the segmentssegments3d(interleave(mtcars$wt, mtcars$wt),

interleave(mtcars$disp, mtcars$disp),interleave(mtcars$mpg, min(mtcars$mpg)),alpha=0.4, col="blue")

Scattter plot with jitter rugs,spikes and density

x <- rnorm(1000, 50, 30)y <- 3*x + rnorm(1000, 0, 20)require(Hmisc)plot(x,y)#scat1d adds tick marks (bar codes. rug plot)# on any of the four sides of an existing plot,# corresponding with non-missing values of a vector x.scat1d(x, col = "red") # density bars on top of graphscat1d(y, 4, col = "blue") # density bars at right

16

−50 0 50 100 150

−20

00

100

200

300

400

x

y

plot(x,y, pch = 20)histSpike(x, add=TRUE, col = "green4", lwd = 2)histSpike(y, 4, add=TRUE,col = "blue", lwd = 2 )histSpike(x, type='density',col = "red", add=TRUE) # smooth density at bottomhistSpike(y, 4, type='density', col = "red", add=TRUE)

17

−50 0 50 100 150

−20

00

100

200

300

400

x

y

Bar graphs and Histograms

barplot(BOD$demand, names.arg=BOD$Time)

18

1 2 3 4 5 7

05

1015

# Using the table functionbarplot(table(mtcars$cyl))

4 6 8

02

46

810

14

19

qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")

0

5

10

15

20

2 4 6BOD$Time

BO

D$d

eman

d

# Conisdering facotrqplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")

20

0

5

10

15

20

1 2 3 4 5 7factor(BOD$Time)

BO

D$d

eman

d

# cyl is continuous hereqplot(mtcars$cyl)

0

5

10

4 5 6 7 8mtcars$cyl

coun

t

21

# Treat cyl as discreteqplot(factor(mtcars$cyl))

0

5

10

4 6 8factor(mtcars$cyl)

coun

t

# Bar graph of values. This uses the BOD data frame, with the# "Time" column for x values and the "demand" column for y values.ggplot(BOD, aes(x=Time, y=demand)) +

geom_bar(stat="identity")

22

0

5

10

15

20

2 4 6Time

dem

and

ggplot(mtcars, aes(x=factor(cyl))) +geom_bar(fill="white",color="black")

0

5

10

4 6 8factor(cyl)

coun

t

23

# Specify approximate number of bins with breaksggplot(mtcars, aes(x=mpg)) +

geom_histogram(binwidth=4,fill="white", colour="black")

0

2

4

6

8

10 20 30 40mpg

coun

t

# Change the x axis origin using origin parameterggplot(mtcars, aes(x=mpg)) +

geom_histogram(binwidth=4,fill="white", colour="black",origin=20)

24

0

2

4

6

20 25 30 35mpg

coun

t

Histograms of multiple groups of data

library(MASS)ggplot(heightweight, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") +facet_grid(sex ~ .)

25

0

5

10

15

20

0

5

10

15

20

fm

50 55 60 65 70heightIn

coun

t

hw<-heightweight

# Using plyr and revalue() to change the names on sex variablelibrary(plyr)hw$sex<- revalue(hw$sex,c("f"="Female","m"="Male"))

# Using facettingggplot(hw, aes(x=heightIn)) +

geom_histogram(fill="white", colour="black") +facet_grid(sex ~ .)

26

0

5

10

15

20

0

5

10

15

20

Fem

aleM

ale

50 55 60 65 70heightIn

coun

t

ggplot(hw, aes(x=heightIn, y = ..density.. ,fill=sex)) +geom_histogram(position="identity",alpha=0.4)+theme_bw()+geom_density(alpha=0.3)

0.00

0.05

0.10

0.15

0.20

0.25

50 55 60 65 70heightIn

dens

ity

sex

Female

Male

27

Negative and Positive Bar plot

csub <- subset(climate, Source=="Berkeley" & Year >= 1900)head(csub)csub$pos <- csub$Anomaly10y >= 0ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) +

geom_bar(stat="identity", color="black",position="identity")

0.0

0.5

1920 1950 1980Year

Ano

mal

y10y pos

FALSE

TRUE

Error Bar plot in ggplot2

myd <- data.frame (X = c(1:12,1:12),Y = c(8, 12, 13, 18, 22, 16, 24, 29, 34, 15, 8, 6,

9, 10, 12, 18, 26, 28, 28, 30, 20, 10, 9, 9),group = rep (c("X-Group", "Y-group"), each = 12),error = rep (c(2.5, 3.0), each = 12))

plt = ggplot(data = myd, aes(x=X, y=Y, fill=group, width=0.8) ) +geom_errorbar(aes(ymin=Y, ymax=Y+error, width = 0.2),

position=position_dodge(width=0.8)) +geom_bar(stat="identity", position=position_dodge(width=0.8)) +geom_bar(stat="identity", position=position_dodge(width=0.8),

colour="black", legend=FALSE) +scale_fill_manual(values=c("grey70", "white")) +scale_x_discrete("X", limits=c(1:12)) +scale_y_continuous("Y (units)", expand=c(0,0),

limits = c(0, 40), breaks=seq(0, 40, by=5)) +ggtitle ("My nice plot") +theme_bw() +

theme( plot.title = element_text(face="bold", size=14),

28

axis.title.x = element_text(face="bold", size=12),axis.title.y = element_text(face="bold", size=12, angle=90),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.text.y=element_text(angle=90, hjust=0.5),legend.title = element_blank(),legend.position = c(0.85,0.85),legend.key.size = unit(1.5, "lines"),legend.key = element_rect()

)

plt

05

1015

2025

3035

40

1 2 3 4 5 6 7 8 9 10 11 12X

Y (

units

)

X−Group

Y−group

My nice plot

Box plots

# Using the ToothGrowth dataset# Formula syntaxboxplot(len ~ supp, data = ToothGrowth)

29

OJ VC

510

1520

2530

35

# Put interaction of two variables on x-axisboxplot(len ~ supp + dose, data = ToothGrowth)

OJ.0.5 OJ.1 OJ.2

510

1520

2530

35

30

ggplot(ToothGrowth, aes(x=supp, y=len)) +geom_boxplot()

10

20

30

OJ VCsupp

len

# Adding notchesggplot(ToothGrowth, aes(x=supp, y=len)) +

geom_boxplot(notch=TRUE)

31

10

20

30

OJ VCsupp

len

# Adding meanggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot() +

stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")

10

20

30

OJ VCsupp

len

32

# Using three separate vectorsggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) +

geom_boxplot()

10

20

30

OJ.0.5 VC.0.5 OJ.1 VC.1 OJ.2 VC.2interaction(supp, dose)

len

Violin plots are a way of comparing multiple data distributions

# Use the heightweight datasetsp <- ggplot(heightweight, aes(x=sex, y=heightIn))p + geom_violin(trim=FALSE,adjuts=2)+

geom_boxplot(width=.1, fill="Grey", outlier.colour=NA)+theme_bw()+stat_summary(fun.y="mean", geom="point", shape=24, size=4, fill="white")

33

50

60

70

f msex

heig

htIn

Plotting curves

curve(x^3 - 5*x, from=-4, to=4)

34

−4 −2 0 2 4

−40

−20

020

40

x

x^3

− 5

* x

# Plot a user-defined functionmyfun <- function(xvar) {

1/(1 + exp(-xvar + 10))}

curve(myfun(x), from=0, to=20)# Add a line:curve(1-myfun(x), add = TRUE, col = "red")

35

0 5 10 15 20

0.0

0.2

0.4

0.6

0.8

1.0

x

myf

un(x

)

# This sets the x range from 0 to 20ggplot(data.frame(x=c(0, 20)), aes(x=x)) +

stat_function(fun=myfun, geom="line")

0.00

0.25

0.50

0.75

1.00

0 5 10 15 20x

y

36

Miscellaneous plots

Making Density Plot of Two-Dimensional Data

p <- ggplot(faithful, aes(x=eruptions, y=waiting))p + geom_point() + stat_density2d()

50

60

70

80

90

2 3 4 5eruptions

wai

ting

p + stat_density2d(aes(colour=..level..))

37

50

60

70

80

90

2 3 4 5eruptions

wai

ting

0.005

0.010

0.015

0.020

level

p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)

50

60

70

80

90

2 3 4 5eruptions

wai

ting

0.005

0.010

0.015

0.020

0.025density

38

# With points, and map density estimate to alphap + geom_point() +

stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)

50

60

70

80

90

2 3 4 5eruptions

wai

ting

density

0.005

0.010

0.015

0.020

0.025

Plotting Pie Charts

library(RColorBrewer)slices <- c(10, 12,4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pie(slices, labels = lbls, main="Pie Chart of Countries",col=brewer.pal(7,"Set1"))

39

IN

AK

ID

MA

MO

Pie Chart of Countries

Pie Chart with Percentages

slices <- c(10, 12, 4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pct <- round(slices/sum(slices)*100)lbls <- paste(lbls, pct) # add percents to labelslbls <- paste(lbls,"%",sep="") # ad % to labelspie(slices,labels = lbls, col=rainbow(length(lbls)),

main="Pie Chart of US States")

40

IN 20%

AK 24%

ID 8%

MA 32%

MO 16%

Pie Chart of US States

3D Pie chart

library(plotrix)slices <- c(10, 12, 4, 16, 8)lbls <- c("IN", "AK", "ID", "MA", "MO")pie3D(slices,labels=lbls,explode=0.1,

main="Pie Chart of Countries ",col=brewer.pal(7,"Set1"))

41

Pie Chart of Countries

INAK

ID

MA

MO

A dendrogram is the fancy word that we use to name a tree diagram to display the groups formed byhierarchical clustering. # Using Corrgrams package

library(corrgram)R <- cor(mtcars)# default corrgramcorrgram(R)

42

mpg

cyl

disp

hp

drat

wt

qsec

vs

am

gear

carb

# corrgram with pie chartscorrgram(R, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie,

text.panel = panel.txt, main = "mtcars Data")

gear

am

drat

mpg

vs

qsec

wt

disp

cyl

hp

carb

mtcars Data

43

The package ellipse provides the function plotcorr() that helps us to visualize correlations. plotcorr() usesellipse-shaped glyphs for each entry of the correlation matrix. Here’s the default plot using our matrix of R:

# default corrgramlibrary(ellipse)plotcorr(R)

mpgcyl

disphp

dratwt

qsecvs

amgearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

# colored corrgramplotcorr(R, col = colorRampPalette(c("firebrick3", "white", "navy"))(10))

44

mpgcyl

disphp

dratwt

qsecvs

amgearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

carb

Another colored corrgram

plotcorr(R, col = colorRampPalette(c("#E08214", "white", "#8073AC"))(10), type = "lower")

cyldisp

hpdrat

wtqsec

vsam

gearcarb

mpg

cyl

disp

hp drat

wt

qsec

vs am gear

45

Visualizing Dendrograms

# prepare hierarchical clusterhc = hclust(dist(mtcars))plot(hc, hang = -1) ## labels at the same level

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

030

0

Cluster Dendrogram

hclust (*, "complete")dist(mtcars)

Hei

ght

An alternative way to produce dendrograms is to specifically convert hclust objects into dendrograms objects.

# using dendrogram objectshcd = as.dendrogram(hc)# alternative way to get a dendrogramplot(hcd)

46

010

020

030

040

0

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

Having an object of class dendrogram, we can also plot the branches in a triangular form.

# using dendrogram objectsplot(hcd, type = "triangle")

47

010

020

030

040

0

Mas

erat

i Bor

aC

hrys

ler

Impe

rial

Cad

illac

Fle

etw

ood

Linc

oln

Con

tinen

tal

For

d P

ante

ra L

Dus

ter

360

Cam

aro

Z28

Hor

net S

port

abou

tP

ontia

c F

irebi

rdH

orne

t 4 D

rive

Val

iant

Mer

c 45

0SLC

Mer

c 45

0SE

Mer

c 45

0SL

Dod

ge C

halle

nger

AM

C J

avel

inH

onda

Civ

icTo

yota

Cor

olla

Fia

t 128

Fia

t X1−

9F

erra

ri D

ino

Lotu

s E

urop

aM

erc

230

Vol

vo 1

42E

Dat

sun

710

Toyo

ta C

oron

aP

orsc

he 9

14−

2M

erc

240D

Maz

da R

X4

Maz

da R

X4

Wag

Mer

c 28

0M

erc

280C

Phylogenetic trees

library(ape)# plot basic treeplot(as.phylo(hc), cex = 0.9, label.offset = 1)

48

Mazda RX4Mazda RX4 Wag

Datsun 710

Hornet 4 DriveHornet Sportabout

Valiant

Duster 360

Merc 240D

Merc 230

Merc 280Merc 280C

Merc 450SEMerc 450SLMerc 450SLC

Cadillac FleetwoodLincoln ContinentalChrysler Imperial

Fiat 128Honda CivicToyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Camaro Z28Pontiac Firebird

Fiat X1−9

Porsche 914−2

Lotus Europa

Ford Pantera L

Ferrari Dino

Maserati Bora

Volvo 142E

# fanplot(as.phylo(hc), type = "fan")

49

Mazda RX4

Mazda RX4 WagD

atsu

n 71

0H

ornet 4 Drive H

orne

t Spo

rtab

out

Valiant

Dus

ter 3

60

Merc 240D

Mer

c 23

0

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetwood

Lincoln Contin

ental

Chrysler Imperial

Fiat 128

Honda Civic

Toyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Cam

aro

Z28

Pon

tiac

Fire

bird

Fiat X1−9

Porsche 914−2Lo

tus

Euro

pa

Ford

Pan

tera

L

Ferra

ri Dino

Maserati Bora

Volv

o 14

2E

# add colors randomlyplot(as.phylo(hc), type = "fan", tip.color = hsv(runif(15, 0.65,

0.95), 1, 1, 0.7),edge.color = hsv(runif(10, 0.65, 0.75), 1, 1, 0.7),edge.width = runif(20,0.5, 3), use.edge.length = TRUE, col = "gray80")

50

Mazda RX4

Mazda RX4 WagD

atsu

n 71

0H

ornet 4 Drive H

orne

t Spo

rtab

out

Valiant

Dus

ter 3

60

Merc 240D

Mer

c 23

0

Merc 280

Merc 280C

Merc 450SE

Merc 450SL

Merc 450SLC

Cadillac Fleetwood

Lincoln Contin

ental

Chrysler Imperial

Fiat 128

Honda Civic

Toyota Corolla

Toyota Corona

Dodge ChallengerAMC Javelin

Cam

aro

Z28

Pon

tiac

Fire

bird

Fiat X1−9

Porsche 914−2Lo

tus

Euro

pa

Ford

Pan

tera

L

Ferra

ri Dino

Maserati Bora

Volv

o 14

2E

Triple heat map plot

library(reshape2)library (grid)library(ggplot2)

#X axis quantitaive ggplot datadatfx <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))),matrix(sample(LETTERS[1:7],80, T), ncol = 4))

# converting data to long form for ggplot2 usedatf1x <- melt(datfx, id.var = 'indv')

plotx <- ggplot(datf1x, aes(indv, variable)) +geom_tile(aes(fill = value),colour = "white") +scale_fill_manual(values= terrain.colors(7))+scale_x_discrete(expand=c(0,0))

px <- plotx#Y axis quantitaive ggplot datadatfy <- data.frame(indv=factor(paste("ID", 21:40, sep = ""),

levels =rev(paste("ID",21:40, sep = ""))), matrix(sample(LETTERS[7:10],100, T), ncol = 5))# converting data to long form for ggplot2 usedatf1y <- melt(datfy, id.var = 'indv')

ploty <- ggplot(datf1y, aes( variable, indv)) + geom_tile(aes(fill = value),colour = "white") +

scale_fill_manual(values= c("cyan4", "midnightblue", "green2", "lightgreen")) +scale_x_discrete(expand=c(0,0))

51

py <- ploty + theme(legend.position="left", axis.title=element_blank())

# plot XY quantative filldatfxy <- data.frame(indv=factor(paste("ID", 1:20, sep = ""),

levels =rev(paste("ID", 1:20, sep = ""))), matrix(rnorm (400, 50, 10), ncol = 20))names (datfxy) <- c("indv",paste("ID", 21:40, sep = ""))datfxy <- melt(datfxy, id.var = 'indv')levels (datfxy$ variable) <- rev(paste("ID", 21:40, sep = ""))

pxy <- plotxy <- ggplot(datfxy, aes(indv, variable)) +geom_tile(aes(fill = value),colour = "white") +scale_fill_gradient(low="red", high="yellow") +theme(axis.title=element_blank())

# Define layout for the plots (2 rows, 2 columns)layt<-grid.layout(nrow=2,ncol=2,heights=c(6/8,2/8),widths=c(2/8,6/8),default.units=c('null','null'))#View the layout of plotsgrid.show.layout(layt)

52

(1, 1)0.75null

0.25null

(1, 2) 0.75null

0.75null

(2, 1)0.25null

0.25null

(2, 2)

0.75null

0.25null

#Draw plots one by one in their positionsgrid.newpage()pushViewport(viewport(layout=layt))print(py,vp=viewport(layout.pos.row=1,layout.pos.col=1))print(pxy,vp=viewport(layout.pos.row=1,layout.pos.col=2))print(px,vp=viewport(layout.pos.row=2,layout.pos.col=2))

53

ID40

ID39

ID38

ID37

ID36

ID35

ID34

ID33

ID32

ID31

ID30

ID29

ID28

ID27

ID26

ID25

ID24

ID23

ID22

ID21

X1X2X3X4X5

value

G

H

I

J

ID40

ID39

ID38

ID37

ID36

ID35

ID34

ID33

ID32

ID31

ID30

ID29

ID28

ID27

ID26

ID25

ID24

ID23

ID22

ID21

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1

30

40

50

60

70

value

X1

X2

X3

X4

ID20ID19ID18ID17ID16ID15ID14ID13ID12ID11ID10 ID9 ID8 ID7 ID6 ID5 ID4 ID3 ID2 ID1indv

varia

ble

value

A

B

C

D

E

F

G

Mosaic plot for categorical data

myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE),fact2 = sample (c("HL", "PS", "DS"), 200, replace = TRUE),fact3 = sample (c("Male", "Female"), 200, replace = TRUE))

#plot# vcd package is for visualization of categorical datarequire(vcd)mytable <- table (myd)mosaic(mytable, shade=TRUE, legend=TRUE)

54

top related