Software

 

Allmost all graphics and examples are created using Mondrian, so you should have the easiest start when using Mondrian to begin with.

If you are used to R and feel at home using R functionality the iplots package is also a good option to try. iplots are quite similar to Mondrian.

     A third option is to use ggobi, which has also many interactive features but may be lacking the one or the other plot.


Mondrian

    - http://mondrian.theusRus.de


iplots

    - http://www.iplots.org


ggobi

    - http://www.ggobi.org


R

    - http://www.r-project.org


Here is the R-code used in the case studies for convenient copy and paste to your favorite R-GUI.


A     How to Pass an Exam


# Read in Data (fill in your actual path!)

#

PTE <- read.table(".../students.txt",header=T,sep="\t", quote="")

# Make all >=8 to 8 and the factor numeric

#

levels(PTE$Semester)[6] <- "8"

PTE$Semester <- as.numeric(as.character(PTE$Semester))

# Dummy for even years

#

even <- 1 - (PTE$Semester %% 2)

l1 <- lm(Total.Points ~ Semester + even, data=PTE)

summary(l1)

plot(PTE$Semester, PTE$Total.Points)

abline(l1$coeff[1:2])

abline(l1$coeff[1]+l1$coeff[3], l1$coeff[2])


B     Washing – What makes the Difference


# Create data on the fly

#

deter <- cbind(expand.grid(Preference=c("X","M"),

      Temp=c("Low","High"), M.user=c("No","Yes"),

             WaterSoft=c("Hard","Medium","Soft")),

      Fr = c(68,42,42,30,37,52,24,43,

             66,50,33,23,47,55,23,47,

             63,53,29,27,57,49,19,29))

# Define ordered Level

#

deter$WaterSoft <- ordered(deter$WaterSoft,

                levels=c("Soft","Medium","Hard"))

# Create Model

#

deter.ll <- glm(terms(Fr ~ M.user*Temp*WaterSoft +

                           Preference*M.user*Temp,

                           keep=T),

                family=poisson, data=deter)

#

summary(deter.ll, correlation=F)


C     The Influence of Smoking on Birthweight


# Read in Data and attach it (fill in your actual path!)

#

Smoke <- read.table(".../birthweight.txt", header=T, sep="\t", quote="")

attach(Smoke)

#

Smokes.Now <- Smoking == ’now’

t.test(Birth.Weight ˜ Smokes.Now, var=T)


E     Housing Rent Prices in Munich


# Read in Data and attach it (fill in your actual path!)

#

rent <- read.table(".../rent.txt", header=T, sep="\t", quote="")

attach(rent)

# Create the Model

#

m <- lm(Rent ~. + Size*Built-District-Rent.per.sqm, rent)

summary(m)


F     What makes a Tour de France Winner


# Read in Data (fill in your actual path!)

#

TDF <- read.table(".../TDF2005.txt", header=T, sep="\t", quote="")

#

st <- TDF[,27:47]           # cummulative times

st <- st[!is.na(st[[21]]),] # filter out drop outs

#

# calculate correlations for stages 1..20 and plot them

#

sc <- unlist(lapply(st[,1:20], cor, st[[21]]))

plot(sc, col=2, type=’l’,xlab=’Stage’,ylab=’Correlation’)

#

sim <- function(...) {      # simulation function

+ m <- matrix(runif(prod(dim(st))),21)# random times

+ cm <- apply(m, 2, cumsum) # cumulat. times

+ apply(cm, 1, cor, cm[21,])[-21] # correlations

+ }                         # excl. stage 21

s=matrix(unlist(lapply(1:500, sim)),20) # simulate 500x

lines(apply(s, 1, median))

lines(apply(s, 1, quantile,0.025), lty=2)

lines(apply(s, 1, quantile,0.975), lty=2)


G     How to survive Thirty Years’ War


# Read in Data (fill in your actual path!)

#

Augsburg <- read.table(".../augsburg.txt", header=T, sep="\t", quote="")

#

summary(lm(Tax.1646˜Tax.1618-1,data=Augsburg))$coeff


H     Classification of Italian Olive Oils


# Read in Data (fill in your actual path!)

#

olives <- read.table(".../olives.txt",header=T,sep="\t", quote="")

#

# load the rpart-library

#

library(rpart)

#

# create the tree model (exclude Area!)

#

t1 <- rpart(Region ˜ . , data = olives[,1:9])

#

# create confusion matrix

#

t2 <- table(predict(t1, type="class"), olives$Region)

#

# everything not on the diagonal is an error

#

sum(t2) - sum(diag(t2))


I     E-Voting in the 2004 Florida Election


# Read in Data (fill in your actual path!)

#

e <- read.table(".../election.txt",header=T,sep="\t", quote="")

# rebuild Hout’s model

hm <- lm(bush_change ˜ bush2000pc + bush2000pc_sq +

                votes2004 + evote + bush2000pc_evote +

               bush2000pcsq_evote + votes_change +

              dole1996pc + income + hispanic, data=e)

p_votes <- (predict(hm) + e$bush2000pc) * e$votes2004

#

# create data with e-voting "removed"

#

e0 <- e

null <- rep(0, length(predict(hm)))

e0$evote <- null

e0$bush2000pc_evote <- null

e0$bush2000pcsq_evote <- null

#

# predict the new data with Hout’s model

#

p0 <- predict(hm, newdata=e0)

p0_votes <- (p0 + e$bush2000pc) * e$votes2004

#

# calculate the difference

#

sum(p_votes-p0_votes)