Software
Allmost all graphics and examples are created using Mondrian, so you should have the easiest start when using Mondrian to begin with.
If you are used to R and feel at home using R functionality the iplots package is also a good option to try. iplots are quite similar to Mondrian.
A third option is to use ggobi, which has also many interactive features but may be lacking the one or the other plot.
Mondrian
iplots
ggobi
R
Here is the R-code used in the case studies for convenient copy and paste to your favorite R-GUI.
A How to Pass an Exam
# Read in Data (fill in your actual path!)
#
PTE <- read.table(".../students.txt",header=T,sep="\t", quote="")
# Make all >=8 to 8 and the factor numeric
#
levels(PTE$Semester)[6] <- "8"
PTE$Semester <- as.numeric(as.character(PTE$Semester))
# Dummy for even years
#
even <- 1 - (PTE$Semester %% 2)
l1 <- lm(Total.Points ~ Semester + even, data=PTE)
summary(l1)
plot(PTE$Semester, PTE$Total.Points)
abline(l1$coeff[1:2])
abline(l1$coeff[1]+l1$coeff[3], l1$coeff[2])
B Washing – What makes the Difference
# Create data on the fly
#
deter <- cbind(expand.grid(Preference=c("X","M"),
Temp=c("Low","High"), M.user=c("No","Yes"),
WaterSoft=c("Hard","Medium","Soft")),
Fr = c(68,42,42,30,37,52,24,43,
66,50,33,23,47,55,23,47,
63,53,29,27,57,49,19,29))
# Define ordered Level
#
deter$WaterSoft <- ordered(deter$WaterSoft,
levels=c("Soft","Medium","Hard"))
# Create Model
#
deter.ll <- glm(terms(Fr ~ M.user*Temp*WaterSoft +
Preference*M.user*Temp,
keep=T),
family=poisson, data=deter)
#
summary(deter.ll, correlation=F)
C The Influence of Smoking on Birthweight
# Read in Data and attach it (fill in your actual path!)
#
Smoke <- read.table(".../birthweight.txt", header=T, sep="\t", quote="")
attach(Smoke)
#
Smokes.Now <- Smoking == ’now’
t.test(Birth.Weight ˜ Smokes.Now, var=T)
E Housing Rent Prices in Munich
# Read in Data and attach it (fill in your actual path!)
#
rent <- read.table(".../rent.txt", header=T, sep="\t", quote="")
attach(rent)
# Create the Model
#
m <- lm(Rent ~. + Size*Built-District-Rent.per.sqm, rent)
summary(m)
F What makes a Tour de France Winner
# Read in Data (fill in your actual path!)
#
TDF <- read.table(".../TDF2005.txt", header=T, sep="\t", quote="")
#
st <- TDF[,27:47] # cummulative times
st <- st[!is.na(st[[21]]),] # filter out drop outs
#
# calculate correlations for stages 1..20 and plot them
#
sc <- unlist(lapply(st[,1:20], cor, st[[21]]))
plot(sc, col=2, type=’l’,xlab=’Stage’,ylab=’Correlation’)
#
sim <- function(...) { # simulation function
+ m <- matrix(runif(prod(dim(st))),21)# random times
+ cm <- apply(m, 2, cumsum) # cumulat. times
+ apply(cm, 1, cor, cm[21,])[-21] # correlations
+ } # excl. stage 21
s=matrix(unlist(lapply(1:500, sim)),20) # simulate 500x
lines(apply(s, 1, median))
lines(apply(s, 1, quantile,0.025), lty=2)
lines(apply(s, 1, quantile,0.975), lty=2)
G How to survive Thirty Years’ War
# Read in Data (fill in your actual path!)
#
Augsburg <- read.table(".../augsburg.txt", header=T, sep="\t", quote="")
#
summary(lm(Tax.1646˜Tax.1618-1,data=Augsburg))$coeff
H Classification of Italian Olive Oils
# Read in Data (fill in your actual path!)
#
olives <- read.table(".../olives.txt",header=T,sep="\t", quote="")
#
# load the rpart-library
#
library(rpart)
#
# create the tree model (exclude Area!)
#
t1 <- rpart(Region ˜ . , data = olives[,1:9])
#
# create confusion matrix
#
t2 <- table(predict(t1, type="class"), olives$Region)
#
# everything not on the diagonal is an error
#
sum(t2) - sum(diag(t2))
I E-Voting in the 2004 Florida Election
# Read in Data (fill in your actual path!)
#
e <- read.table(".../election.txt",header=T,sep="\t", quote="")
# rebuild Hout’s model
hm <- lm(bush_change ˜ bush2000pc + bush2000pc_sq +
votes2004 + evote + bush2000pc_evote +
bush2000pcsq_evote + votes_change +
dole1996pc + income + hispanic, data=e)
p_votes <- (predict(hm) + e$bush2000pc) * e$votes2004
#
# create data with e-voting "removed"
#
e0 <- e
null <- rep(0, length(predict(hm)))
e0$evote <- null
e0$bush2000pc_evote <- null
e0$bush2000pcsq_evote <- null
#
# predict the new data with Hout’s model
#
p0 <- predict(hm, newdata=e0)
p0_votes <- (p0 + e$bush2000pc) * e$votes2004
#
# calculate the difference
#
sum(p_votes-p0_votes)