Software

Allmost all graphics and examples are created using Mondrian, so you should have the easiest start when using Mondrian to begin with.

If you are used to R and feel at home using R functionality the iplots package is also a good option to try. iplots are quite similar to Mondrian.

A third option is to use ggobi, which has also many interactive features but may be lacking the one or the other plot.

Mondrian

- http://mondrian.theusRus.de

iplots

- http://www.iplots.org

ggobi

- http://www.ggobi.org

- http://www.r-project.org

Here is the R-code used in the case studies for convenient copy and paste to your favorite R-GUI.

A How to Pass an Exam

# Read in Data (fill in your actual path!)

PTE <- read.table(".../students.txt",header=T,sep="\t", quote="")

# Make all >=8 to 8 and the factor numeric

levels(PTE$Semester)[6] <- "8"

PTE$Semester <- as.numeric(as.character(PTE$Semester))

# Dummy for even years

even <- 1 - (PTE$Semester %% 2)

l1 <- lm(Total.Points ~ Semester + even, data=PTE)

summary(l1)

plot(PTE$Semester, PTE$Total.Points)

abline(l1$coeff[1:2])

abline(l1$coeff[1]+l1$coeff[3], l1$coeff[2])

B Washing – What makes the Difference

# Create data on the fly

deter <- cbind(expand.grid(Preference=c("X","M"),

Temp=c("Low","High"), M.user=c("No","Yes"),

WaterSoft=c("Hard","Medium","Soft")),

Fr = c(68,42,42,30,37,52,24,43,

66,50,33,23,47,55,23,47,

63,53,29,27,57,49,19,29))

# Define ordered Level

deter$WaterSoft <- ordered(deter$WaterSoft,

levels=c("Soft","Medium","Hard"))

# Create Model

deter.ll <- glm(terms(Fr ~ M.user*Temp*WaterSoft +

Preference*M.user*Temp,

keep=T),

family=poisson, data=deter)

summary(deter.ll, correlation=F)

C The Influence of Smoking on Birthweight

# Read in Data and attach it (fill in your actual path!)

Smoke <- read.table(".../birthweight.txt", header=T, sep="\t", quote="")

attach(Smoke)

Smokes.Now <- Smoking == ’now’

t.test(Birth.Weight ˜ Smokes.Now, var=T)

E Housing Rent Prices in Munich

# Read in Data and attach it (fill in your actual path!)

rent <- read.table(".../rent.txt", header=T, sep="\t", quote="")

attach(rent)

# Create the Model

m <- lm(Rent ~. + Size*Built-District-Rent.per.sqm, rent)

summary(m)

F What makes a Tour de France Winner

# Read in Data (fill in your actual path!)

TDF <- read.table(".../TDF2005.txt", header=T, sep="\t", quote="")

st <- TDF[,27:47] # cummulative times

st <- st[!is.na(st[[21]]),] # filter out drop outs

# calculate correlations for stages 1..20 and plot them

sc <- unlist(lapply(st[,1:20], cor, st[[21]]))

plot(sc, col=2, type=’l’,xlab=’Stage’,ylab=’Correlation’)

sim <- function(...) { # simulation function

+ m <- matrix(runif(prod(dim(st))),21)# random times

+ cm <- apply(m, 2, cumsum) # cumulat. times

+ apply(cm, 1, cor, cm[21,])[-21] # correlations

+ } # excl. stage 21

s=matrix(unlist(lapply(1:500, sim)),20) # simulate 500x

lines(apply(s, 1, median))

lines(apply(s, 1, quantile,0.025), lty=2)

lines(apply(s, 1, quantile,0.975), lty=2)

G How to survive Thirty Years’ War

# Read in Data (fill in your actual path!)

Augsburg <- read.table(".../augsburg.txt", header=T, sep="\t", quote="")

summary(lm(Tax.1646˜Tax.1618-1,data=Augsburg))$coeff

H Classification of Italian Olive Oils

# Read in Data (fill in your actual path!)

olives <- read.table(".../olives.txt",header=T,sep="\t", quote="")

# load the rpart-library

library(rpart)

# create the tree model (exclude Area!)

t1 <- rpart(Region ˜ . , data = olives[,1:9])

# create confusion matrix

t2 <- table(predict(t1, type="class"), olives$Region)

# everything not on the diagonal is an error

sum(t2) - sum(diag(t2))

I E-Voting in the 2004 Florida Election

# Read in Data (fill in your actual path!)

e <- read.table(".../election.txt",header=T,sep="\t", quote="")

# rebuild Hout’s model

hm <- lm(bush_change ˜ bush2000pc + bush2000pc_sq +

votes2004 + evote + bush2000pc_evote +

bush2000pcsq_evote + votes_change +

dole1996pc + income + hispanic, data=e)

p_votes <- (predict(hm) + e$bush2000pc) * e$votes2004

# create data with e-voting "removed"

e0 <- e

null <- rep(0, length(predict(hm)))

e0$evote <- null

e0$bush2000pc_evote <- null

e0$bush2000pcsq_evote <- null

# predict the new data with Hout’s model

p0 <- predict(hm, newdata=e0)

p0_votes <- (p0 + e$bush2000pc) * e$votes2004

# calculate the difference

sum(p_votes-p0_votes)