Hinweis: Die thematische Gliederung zu den aufgeführten Befehlen und Beispielen orientiert sich an dem Inhaltsverzeichnis der 17. Auflage der ‘Angewandten Statistik’. Nähere Hinweise zu den verwendeten Formeln sowie Erklärungen zu den Beispielen sind in dem Buch (Ebook) nachzulesen!
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
## [5] LC_TIME=German_Germany.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.6.3 magrittr_1.5 tools_3.6.3 htmltools_0.4.0
## [5] yaml_2.2.1 Rcpp_1.0.4 stringi_1.4.6 rmarkdown_2.1
## [9] knitr_1.28 stringr_1.4.0 xfun_0.12 digest_0.6.25
## [13] rlang_0.4.5 evaluate_0.14
Download von Datensätzen, die in den folgenden Beispielen verwendet werden:
Zuverlässigkeit: reliabilität
Elektrischer Widerstand: resistivity
Nichtparametr. Regression: nonparegdat
# Blutgruppen A, B, AB, 0
Blutgruppen <- c(rep("A", 69), rep("B", 17), rep("AB", 7), rep("0", 62))
vt <- table(Blutgruppen); vt
## Blutgruppen
## 0 A AB B
## 62 69 7 17
Modalwert <- (names(vt[vt == max(vt)])); Modalwert
## [1] "A"
vect <- sample(0:9, size=25, replace=T); vect
## [1] 4 0 8 7 4 6 9 3 5 0 1 2 7 5 6 7 8 8 3 1 5 8 2 0 9
vt <- table(vect); mode <- as.numeric(names(vt[vt == max(vt)])); mode
## [1] 8
absolut <- c(69, 17, 7, 62) # Blutgruppen A, B, AB, 0
names(absolut) <- c("A","B","AB","0"); absolut
## A B AB 0
## 69 17 7 62
anzahl <- sum(absolut); anzahl
## [1] 155
relativ <- absolut / anzahl; round(relativ, 2)
## A B AB 0
## 0.45 0.11 0.05 0.40
prozent <- relativ * 100; round(prozent, 1)
## A B AB 0
## 44.5 11.0 4.5 40.0
Gini <- sum(relativ*(1-relativ)); Gini # Gini-Simpson-Index
## [1] 0.6277627
Funktion barplot2() in library(gplots)
absolut <- c(69, 17, 7, 62) # Blutgruppen A, B, AB, 0
names(absolut) <- c("A","B","AB","0"); absolut
## A B AB 0
## 69 17 7 62
# Torten- und Balkendiagramm
library(gplots)
par(mfrow=c(1,3), lwd=1.5, font.axis=2, bty="n", ps=15, cex.axis=1)
barplot2(absolut, names.arg=c("A","B","AB","0"), las=1,
cex.axis = 1.3, cex.names = 1.3, ylim=c(0,70),
density=c(10,15,18,20), angle=c(45,135,45,135), col="black")
barplot2(as.matrix(absolut), names.arg="Blutgruppe", beside = FALSE,
ylim=c(0,160), yaxp=c(0,160,8), xlim=c(0,1.5), las=1,
cex.axis = 1.3, cex.names = 1.1,
density=c(10,15,18,20),angle=c(45,135,45,135), col="black")
text(1.4,30,"A",bg="white",cex=1.8); text(1.4,78,"B",bg="white",cex=1.8)
text(1.4,90,"AB",bg="white",cex=1.8); text(1.4,120,"0",bg="white",cex=1.8)
pie(absolut, labels=c("A","B","AB","0"), radius = 1.0,
density=c(10,15,18,20),angle=c(45,135,45,135), col="black", cex=1.7)
# Blutgruppen nach dem Geschlecht
absolut <- matrix(c(30, 10, 5, 40, 39, 7, 2, 22), nrow=2, byrow=T)
colnames(absolut) <- c("A","B", "AB","0")
rownames(absolut) <- c("maennlich", "weiblich")
names(dimnames(absolut)) <- c("Geschlecht","Blutgruppe"); absolut
## Blutgruppe
## Geschlecht A B AB 0
## maennlich 30 10 5 40
## weiblich 39 7 2 22
margin.table(absolut, 1)
## Geschlecht
## maennlich weiblich
## 85 70
margin.table(absolut, 2)
## Blutgruppe
## A B AB 0
## 69 17 7 62
round(prop.table(absolut, 1), 3)
## Blutgruppe
## Geschlecht A B AB 0
## maennlich 0.353 0.118 0.059 0.471
## weiblich 0.557 0.100 0.029 0.314
round(prop.table(absolut, 2), 3)
## Blutgruppe
## Geschlecht A B AB 0
## maennlich 0.435 0.588 0.714 0.645
## weiblich 0.565 0.412 0.286 0.355
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1)
barplot(absolut, density=c(10, 20), col="black", angle=c(45,135),
las=1, legend.text=T, ylim=c(0,85), xlim=c(0,8))
mosaicplot(absolut, col=c("grey80","grey60","grey40","grey20"),
cex=1.0, main=" ")
# Simpson-Paradox (Anteilswerte)
data <- array(c(2,53,1,61, 3,121,5,152, 14,95,7,114, 27,103,12,66,
51,64,40,81, 29,7,101,28, 13,0,64,0), dim=c(2,2,7),
dimnames=list(c("verstorben","lebend"),
c("Raucherin","Nichtraucherin"),
c("18-24","25-35","35-44","45-54","55-64","65-74",">74")))
tsums <- apply(data,c(1,2),sum)
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="n", ps=12, cex.axis=2)
mosaicplot(t(tsums), main=" ", col=2:3)
rbind(tsums, round(tsums[1,]/apply(tsums,2,sum)*100, 1))
## Raucherin Nichtraucherin
## verstorben 139.0 230.0
## lebend 443.0 502.0
## 23.9 31.4
for (i in 1:7) { t <- data[,,i]; apply(t,2,sum)
print(cbind(t,round(t[1,]/apply(t,2,sum)*100,1))) }
## Raucherin Nichtraucherin
## verstorben 2 1 3.6
## lebend 53 61 1.6
## Raucherin Nichtraucherin
## verstorben 3 5 2.4
## lebend 121 152 3.2
## Raucherin Nichtraucherin
## verstorben 14 7 12.8
## lebend 95 114 5.8
## Raucherin Nichtraucherin
## verstorben 27 12 20.8
## lebend 103 66 15.4
## Raucherin Nichtraucherin
## verstorben 51 40 44.3
## lebend 64 81 33.1
## Raucherin Nichtraucherin
## verstorben 29 101 80.6
## lebend 7 28 78.3
## Raucherin Nichtraucherin
## verstorben 13 64 100
## lebend 0 0 100
# Assoziationsmaße; Goodman-Kruskal
tau_GK <- function(dat) {
N <- sum(dat);
dat.rows <- nrow(dat); dat.cols <- ncol(dat)
max.col <- sum.col <- L.col <- matrix(,dat.cols)
max.row <- sum.row <- L.row <- matrix(,dat.rows)
for(i in 1:dat.cols) sum.col[i] <- sum(dat[,i]); max.col[i] <- max(dat[,i])
for(i in 1:dat.rows) sum.row[i] <- sum(dat[i,]); max.row[i] <- max(dat[i,])
max.row.margin <- max(apply(dat,1,sum));
max.col.margin <- max(apply(dat,2,sum));
p.a <- N^2 # tau Column|Row
for(i in 1:dat.rows) p.a <- p.a - N * sum(dat[i,]^2/sum.row[i])
p.b <- N^2 - sum(sum.col^2);
tau.CR <- 1 - (p.a / p.b)
p.a <- N^2 # tau Row|Column
for(j in 1:dat.cols) p.a <- p.a - N * sum(dat[,j]^2/sum.col[j])
p.b <- N^2-sum(sum.row^2)
tau.RC <- 1 - (p.a / p.b)
cat("\ntau_Col:Row = ",round(tau.CR, 3),
" and tau_Row:Col = ",round(tau.RC, 3),"\n")
}
x <- matrix(c(10,30,5,0,20,30,5,0,0), byrow=T, nrow=3); x
## [,1] [,2] [,3]
## [1,] 10 30 5
## [2,] 0 20 30
## [3,] 5 0 0
tau_GK(x)
##
## tau_Col:Row = 0.236 and tau_Row:Col = 0.28
# Rangdaten
vor <- c(3, 4, 6, 4, 8, 9, 2, 7, 10, 7, 5, 6, 5 )
nach <- c(4, 4, 1, 5, 3, 3, 1, 3, 4, 5, 6, 9, 1 )
vor; sort(vor)
## [1] 3 4 6 4 8 9 2 7 10 7 5 6 5
## [1] 2 3 4 4 5 5 6 6 7 7 8 9 10
vor; rank(vor)
## [1] 3 4 6 4 8 9 2 7 10 7 5 6 5
## [1] 2.0 3.5 7.5 3.5 11.0 12.0 1.0 9.5 13.0 9.5 5.5 7.5 5.5
# Quartile
vor <- c(3, 4, 6, 4, 8, 9, 2, 7, 10, 7, 5, 6, 5 )
vsort <- sort(vor); n <- length(vsort)
Q1 <- vsort[floor((n+1)*0.25)]; Q1
## [1] 4
Q2 <- vsort[floor((n+1)*0.50)]; Q2
## [1] 6
Q3 <- vsort[floor((n+1)*0.75)]; Q3
## [1] 7
median(vor);
## [1] 6
quantile(vor, c(0.25, 0.50, 0.75))
## 25% 50% 75%
## 4 6 7
MA <- mean(abs(vor-median(vor))); MA # Median-Absolutabweichung
## [1] 1.846154
D <- mad(vor, const=1); D # Median-Deviation
## [1] 2
# Dot-Plot und Box-Plot
vor <- c(3, 4, 6, 4, 8, 9, 2, 7, 10, 7, 5, 6, 5 )
nach <- c(4, 4, 1, 5, 3, 3, 1, 3, 4, 5, 6, 9, 1 )
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1)
stripchart(list(vor, nach), method="jitter", jitter=0.1, las=1,
vertical=TRUE, group.names=c("vor","nach"),
xlim=c(0.5,2.5), ylim=c(0,10), pch=16, cex=1.3)
boxplot(vor, nach, range = 1.5, names=c("vor","nach"),
las=1, ylim=c(0,10), col=8)
kendall.tau <- function(x, y) { # Kendall Korrelationskoeffizient
n <- length(x)
if(n != length(y)) stop("x und y unterschiedliche L?nge")
x <- rank(x); y <- rank(y) # Rangzahlen !
m <- cbind(x,y); m <- m[order(m[,1]),] # Ordnung nach x
x <- m[,1]; y <- m[,2]
inv <- 0; prov <- 0
for (i in 1:n) {
for (j in i:n) {
if (x[i]<x[j] & y[i]>y[j]) inv <- inv + 1 # Inversion
if (x[i]<x[j] & y[i]<y[j]) prov <- prov + 1 # Proversion
} }
rx <- table(x); Tx <- 0.5*sum(rx*(rx-1)) # Bindungen in x
ry <- table(y); Ty <- 0.5*sum(ry*(ry-1)) # Bindungen in y
r.tau_b = (prov-inv)/sqrt((prov + inv + Tx)*(prov + inv + Ty))
cat("Anzahl Proversionen:",prov,"und Inversionen:",inv,"\n",
"Kendalls Tau_b:",r.tau_b,"\n")
}
x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) # Weinsorten
y <- c(2, 1, 5, 3, 4, 6, 7, 9, 8, 10) # ohne Bindungen
kendall.tau(x , y)
## Anzahl Proversionen: 41 und Inversionen: 4
## Kendalls Tau_b: 0.8222222
x <- c( 8, 5, 10, 10, 8); x; rank(x) # Rangzahlen, ordnen
## [1] 8 5 10 10 8
## [1] 2.5 1.0 4.5 4.5 2.5
y <- c(15, 12, 6, 16, 12); y; rank(y) # mit Bindungen
## [1] 15 12 6 16 12
## [1] 4.0 2.5 1.0 5.0 2.5
kendall.tau(x , y)
## Anzahl Proversionen: 4 und Inversionen: 3
## Kendalls Tau_b: 0.1178511
cor(x, y , method = "kendall") # spez. Funktion cor()
## [1] 0.1178511
z <- c(1,2,3,4,5,6) # einführendes Beispiel
x <- c(3,1,4,2,6,5)
y <- c(4,2,1,6,3,5)
tauXZ <- round(cor(x, z, method="kendall"), 4)
tauYZ <- round(cor(y, z, method="kendall"), 4)
tauXY <- round(cor(x, y, method="kendall"), 4)
tauXY.Z <- (tauXY - tauXZ*tauYZ) / sqrt((1-tauXZ^2)*(1-tauYZ^2))
cbind(tauXZ, tauYZ, tauXY, tauXY.Z)
## tauXZ tauYZ tauXY tauXY.Z
## [1,] 0.4667 0.2 -0.0667 -0.1846871
# partielle Rangkorrelation
I <- c(1,2,3,4,5,6,7,8,9,10) # Intelligenz
A <- c(1,4,5,6,2,7,3,9,8,10) # Mathematik
B <- c(4,1,3,5,2,6,7,10,9,8) # Musik
tauAI <- round(cor(A, I, method="kendall"), 4)
tauBI <- round(cor(B, I, method="kendall"), 4)
tauAB <- round(cor(A, B, method="kendall"), 4)
tauAB.I <- (tauAB - tauAI*tauBI) / sqrt((1-tauAI^2)*(1-tauBI^2))
cbind(tauAI, tauBI, tauAB, tauAB.I)
## tauAI tauBI tauAB tauAB.I
## [1,] 0.6444 0.6444 0.5556 0.2400153
# Body-Mass-Index
bmi <- c(28.2,23.9,20.3,26.7,25.6,32.5,23.5,19.7,27.8,26.7,20.7,28.4,33.3)
n <- length(bmi)
Summe <- sum(bmi); Summe
## [1] 337.3
Summe/n # arithmetisches Mittel
## [1] 25.94615
mean(bmi) # Funktion mean()
## [1] 25.94615
bmi <- c(22.2,23.9,20.3,26.7,25.6,22.5,23.5,24.7,27.8,26.7,20.7,26.4,40.3)
sort(bmi)
## [1] 20.3 20.7 22.2 22.5 23.5 23.9 24.7 25.6 26.4 26.7 26.7 27.8 40.3
mean(bmi)
## [1] 25.48462
mean(bmi, trim=0.1) # gestutzter Mittelwert
## [1] 24.60909
winsor <- function(x, tr=.1) { # winsorisierter Mittelwert
if(tr < 0 || tr > 0.5) stop("Anteil?")
y <- sort(x)
n <- length(x)
ibot <- floor(tr*n)+1; itop <- length(x)-ibot+1
xbot <- y[ibot]; xtop <- y[itop]
y <- ifelse(y<=xbot, xbot, y)
y <- ifelse(y>=xtop, xtop, y)
mean(y)
}
bmi <- c(22.2,23.9,20.3,26.7,25.6,22.5,23.5,24.7,27.8,26.7,20.7,26.4,40.3)
mean(bmi)
## [1] 25.48462
winsor(bmi)
## [1] 24.55385
bmi <- c(28.2,23.9,20.3,26.7,25.6,32.5,23.5,19.7,27.8,26.7,20.7,28.4,33.3)
m <- mean(bmi)
saq <- (bmi - m)^2
sqrt(sum(saq)/(n-1)) # Standardabweichung
## [1] 4.295466
sd(bmi) # Funktion sd()
## [1] 4.295466
var(bmi) # Funktion var()
## [1] 18.45103
s1 <- c(40,50,72); s2 <- c(30,60,80,90,100); s3 <- c(40,50,60,70)
n <- c(3, 5, 4)
m <- c(mean(s1), mean(s2), mean(s3))
s <- c(sd(s1), sd(s2), sd(s3)); v <- s^2
mv.combi <- function(n, m, v) { # Funktion nach Yiu-Man Chan
r <- length(n)
n.t <- sum(n); m.t <- sum(n*m) / n.t
ts1 <- sum((n-1)*v); ts2 <- 0
for (k in 1:(r-1)) for (l in (k+1):r)
ts2 <- ts2 + n[k]*n[l]*(m[k]-m[l])^2
v.t <- (ts1 + (1/n.t)*ts2) / (n.t-1)
cat("\n","Summe =",n.t," - Mittelwert =",m.t," - Varianz =",v.t)
}
mv.combi(n=c(3, 5, 4), m=c(54, 72, 55), v=c(267.98, 770.06, 166.67))
##
## Summe = 12 - Mittelwert = 61.83333 - Varianz = 454.8979
Funktion errbar() in library(Hmisc)
library(Hmisc)
x <- 1:5
y <- c(4.47, 6.43, 7.52, 3.72, 6.73)
delta <- c(1.71, 0.85, 1.43, 0.6, 1.34)
par(mfrow=c(1,1), lwd=2.0, font.axis=2, bty="l", ps=16, cex.axis=1)
errbar( x, y, y + delta, y - delta , ylim=c(2, 10), xlab=" ", pch=15,
las=1, lwd=2.0, cex=2.0, ylab=expression(bar(x)%+-%s))
abline(h=seq(2,10,1), lty=2, col="grey")
gew.stats <- function(meanis, ni, varis=NULL) {
k <- length(meanis); n <- sum(ni)
mean <- sum(ni*meanis)/n
if (is.null(varis)) {
mgew <- sum(ni*meanis)/n
cat("\n","Gewogener Mittelwert:", round(mgew, 2),"\n")
}
if (!is.null(varis)) {
mgew <- sum(ni*meanis/varis)/sum(ni/varis)
sin <- sqrt(sum(varis*(ni-1)) / (n-k))
vgew <- (sum((ni-1)*varis) + sum(ni*(meanis - mean)^2)) / (n-1)
cat("\n","Gewogener Mittelwert :", round(mgew, 2),"\n",
"Standardabw. innerhalb:", round(sin, 3),"\n",
"Gewogene Varianz :", round(vgew, 3),"\n")
}
}
n <- c(8, 10, 6); m <- c(9, 7, 8); s <- c(2, 1, 2) # gewogener Mittelwert bei ...
gew.stats(m, n) # gleiche Varianzen
##
## Gewogener Mittelwert: 7.92
gew.stats(m, n, s^2) # ungleiche Varianzen
##
## Gewogener Mittelwert : 7.41
## Standardabw. innerhalb: 1.648
## Gewogene Varianz : 3.254
note <- c( 2, 3, 3, 2, 1)
gew <- c(0.3, 0.3, 0.2, 0.1, 0.1)
weighted.mean(note, gew) # gewichteter Mittelwert
## [1] 2.4
# Seventy rule (70iger Regel)
rt <- 1:10 # Wachstumsraten (%)
n_rule <- round(70/rt, 2)
n_exact <- round(log(2)/log(1+(rt/100)), 2)
tab <- cbind(rt, n_rule, n_exact); tab
## rt n_rule n_exact
## [1,] 1 70.00 69.66
## [2,] 2 35.00 35.00
## [3,] 3 23.33 23.45
## [4,] 4 17.50 17.67
## [5,] 5 14.00 14.21
## [6,] 6 11.67 11.90
## [7,] 7 10.00 10.24
## [8,] 8 8.75 9.01
## [9,] 9 7.78 8.04
## [10,] 10 7.00 7.27
# geometrischer Mittelwert
gehalt <- c(1.025, 1.10, 1.22) # Gehaltserhöhungen
lg.gehalt <- log10(gehalt)
10^mean(lg.gehalt) # mittlere Gehaltserhoehung
## [1] 1.112138
stueck <- c(10, 5, 8) # Kosten / Stueckzahl
rez.stueck <- 1/stueck; n <- length(stueck)
n / sum(rez.stueck) # mittlere Stueckzahl
## [1] 7.058824
mat <- matrix(c(11, 13, 12, 27, 25, 29, 43, 47, 42, 63, 57, 60), nrow=4, byrow=T)
mat # Beispiel Dreifachbestimmung
## [,1] [,2] [,3]
## [1,] 11 13 12
## [2,] 27 25 29
## [3,] 43 47 42
## [4,] 63 57 60
Bmean <- apply(mat, 1, mean); Bmean
## [1] 12 27 44 60
Bsaq <- apply(mat, 1, function(x) sum((x - mean(x))^2)); Bsaq
## [1] 2 8 14 18
sMB <- sqrt(sum(Bsaq) / 8); sMB
## [1] 2.291288
Pmean <- apply(mat, 2, mean); Pmean
## [1] 36.00 35.50 35.75
Psaq <- apply(mat, 2, function(x) sum((x - mean(x))^2)); Psaq
## [1] 1484.00 1211.00 1236.75
Pv <- Psaq/3; Pv
## [1] 494.6667 403.6667 412.2500
quotient <- sMB / sqrt(sum(Pv)/3); quotient
## [1] 0.1096246
# elektrischer Widerstand
resist <- read.csv("resistivity.csv", header = TRUE, sep = ";", dec=",", fill = FALSE)
attach(resist); resist[1:10, ]
N <- 6 # Wiederholungen
L <- 2 # Serien
K <- 6 # Tage
# Wiederholbarbeit in der Serie gepoolt
s1 <- sqrt(sum(resist$Stdabw^2)/(K*L)); round(s1, 4)
## [1] 0.0787
# Reproduzierbarbeit von Tag zu Tag und gepoolt
s2.1 <- sd(resist$Mittelwert[Serie==1]); round(s2.1, 4)
## [1] 0.0273
s2.2 <- sd(resist$Mittelwert[Serie==2]); round(s2.2, 4)
## [1] 0.0276
s2 <- sqrt((s2.1^2 + s2.2^2)/L); round(s2, 4)
## [1] 0.0274
# Stabilität / Konstanz von Serie zu Serie
m1 <- mean(resist$Mittelwert[Serie==1])
m2 <- mean(resist$Mittelwert[Serie==2])
s3 <- sd(c(m1, m2)); round(s3, 4)
## [1] 0.0288
# Fehler / Unsicherheit für ein einzelnes Ergebnis
s.R <- sqrt(s3^2 + ((K-1)/K)*s2^2 + ((N-1)/N)*s1^2); round(s.R, 4)
## [1] 0.0814
bmi <- c(20.8, 29.7, 27.6, 28.6, 20.7, 21.0, 23.1, 21.9, 24.8, 25.3, 27.1, 23.3,
19.5, 25.2, 25.8, 21.6, 28.7, 30.6, 23.3, 26.6, 35.3, 17.0, 22.6, 25.9,
29.0, 23.7, 21.7, 26.5, 18.5, 24.5, 29.0, 23.2, 27.9, 18.8, 27.1, 21.5,
26.5, 20.3, 25.5, 32.0, 26.7, 34.9, 24.6, 25.6, 26.7, 22.1, 28.8, 28.1,
28.8, 32.2, 30.3, 24.9, 28.0, 21.1, 22.0, 25.5, 24.0, 26.6, 24.7, 28.8)
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=11, cex.axis=1)
hist(bmi, breaks=c(16, 18,20,22,24,26,28,30,32,34,36), col="grey", las=1,
cex.axis=1.2, cex.lab=1.2,
xlim=c(15,40), xlab="Body-Mass-Index", ylab="H?ufigkeit", main=" ")
abline(h=seq(0,12,2,), lty=2, col="grey")
h <- hist(bmi, breaks=c(16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36), plot=FALSE)
h$breaks; h$mids; h$counts; cumsum(h$counts)
## [1] 16 18 20 22 24 26 28 30 32 34 36
## [1] 17 19 21 23 25 27 29 31 33 35
## [1] 1 3 10 8 12 11 9 3 1 2
## [1] 1 4 14 22 34 45 54 57 58 60
round(h$counts/length(bmi),3)*100
## [1] 1.7 5.0 16.7 13.3 20.0 18.3 15.0 5.0 1.7 3.3
round(cumsum(h$counts)/length(bmi),3)*100
## [1] 1.7 6.7 23.3 36.7 56.7 75.0 90.0 95.0 96.7 100.0
h <- hist(bmi, breaks=c(16, 18,20,22,24,26,28,30,32,34,36), plot=FALSE)
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1)
x <- h$breaks; y <- c(0, round(cumsum(h$counts)/length(bmi),3))
plot(x, y, type="b", ylim=c(0,1), xlim=c(15,40), las=1, lwd=2,
xlab="BMI", ylab="rel. Summenh?ufigkeit" )
plot.ecdf(bmi, xlab="BMI", col.vert = "gray20", las=1, xlim=c(15,40), main=" ")
abline(h=0.8, lty=2, col="grey")
abline(v=28.5, lty=2, col="grey")
par(lwd=1.5, font.axis=2, ps=14, cex.axis=1, bty="o")
op <- par(mar=c(0,0,0,0), oma=c(0,0,0,0)+.1)
layout(matrix(c(1,1,1,2), nc=1))
plot(sort(bmi), ppoints(length(bmi)), type="l", lwd=2,
xlab="", ylab="", main="", xaxt="n")
axis(1, labels = TRUE, tick = TRUE)
text(19,0.28,"0.25 (1.Quartil)", cex=1.2)
text(19,0.53,"0.50 (Median)", cex=1.2)
text(19,0.78,"0.75 (3.Quartil)", cex=1.2)
abline(h = c(0,.25,.5,.75,1), lty=2)
abline(v = quantile(bmi), col = "blue", lwd = 2, lty=2)
points(quantile(bmi), c(0,.25,.5,.75,1), lwd= 8, col="blue")
boxplot(bmi, horizontal = TRUE, col = "grey",
lwd=2, cex=1.5, pch=16, axes = FALSE)
quantil.hist <- function(p, breaks, freq) { # Quantile aus klassierten Daten
n <- sum(freq); rel <- freq/n
if(any(p<=0) | any(p>=1)) return("Fehler: 0<p<1")
cum <- cumsum(c(0, rel))
loc <- apply(outer(p, cum, ">="), 1, sum)
return(breaks[loc] + (p - cum[loc])/rel[loc] *
(breaks[loc+1] - breaks[loc])) }
ecdf.hist <- function(x, breaks, freq) { # Anteile aus klassierten Daten
n <- sum(freq); rel <- freq/n
if(x < breaks[1]) return("Fehler: x < min")
else if(x > breaks[length(breaks)])
return("Fehler: x > max")
else {loc <- max(which(breaks <= x))
return(sum(rel[1:(loc-1)])+(x-breaks[loc])/
(breaks[loc+1] - breaks[loc]) * rel[loc])} }
breaks <- c(16, 18, 20, 22, 24, 26, 28, 30, 32, 34)
mids <- c(17, 19, 21, 23, 25, 27, 29, 31, 33, 35)
counts <- c( 1, 3, 10, 8, 12, 11, 9, 3, 1, 2)
barplot(counts, names.arg=mids, las=1, xlab="Body-Mass-Index")
quantil.hist(c(0.25, 0.50, 0.75), breaks, counts) # approximiert
## [1] 22.25000 25.33333 28.00000
quantile(bmi, probs=c(0.25, 0.50, 0.75)) # exakt
## 25% 50% 75%
## 22.475 25.500 28.025
ecdf.hist(x=30, breaks, counts)
## [1] 0.9
pareto <- function (x, main = "", ylab = "Value") {
op <- par(mar = c(5, 4, 4, 5) + 0.1, las = 2)
x <- rev(sort(x))
plot( x, type = 'h', axes = F, lwd = 16,
xlab = "", ylab = ylab, main = main )
axis(2)
points( x, type = 'h', lwd = 12, col = heat.colors(length(x)) )
y <- cumsum(x)/sum(x)
par(new = T)
plot(y, type = "b", lwd = 3, pch = 7, axes = FALSE,
xlab='', ylab='', ylim=c(0,1), main='')
points(y, type = 'h')
axis(4)
par(las=0)
mtext("kumulierter Anteil", side=4, line=3)
print(names(x))
axis(1, at=1:length(x), labels=names(x))
par(op)
}
defect <- c(12, 2, 32, 4, 19, 9, 1)
names(defect) <- c("A", "B", "C", "D", "E", "F", "G")
par(mfrow=c(1,1), lwd=2.5, font.axis=2, bty="u", ps=10, cex.axis=0.8, cex=1.3)
pareto(defect, ylab="Fehler-H?ufigkeit")
## [1] "C" "E" "A" "F" "D" "B" "G"
Funktion pareto.chart() in library(qcc)
library(qcc) # pareto.chart() in library(qcc)
pareto.chart(defect, ylab = "Fehler-H?ufigkeit", main =" ",
xlab="Fehler-Ursache", las=3, col="grey")
##
## Pareto chart analysis for defect
## Frequency Cum.Freq. Percentage Cum.Percent.
## C 32.000000 32.000000 40.506329 40.506329
## E 19.000000 51.000000 24.050633 64.556962
## A 12.000000 63.000000 15.189873 79.746835
## F 9.000000 72.000000 11.392405 91.139241
## D 4.000000 76.000000 5.063291 96.202532
## B 2.000000 78.000000 2.531646 98.734177
## G 1.000000 79.000000 1.265823 100.000000
gini <- function(x, y) { # Funktion zum Gini-Koeffizient
area <- 0 # Trapezregel
for (i in 2:(n+1)) area <- area + 0.5*((x[i]-x[i-1])*(y[i]+y[i-1]))
gini <- 1 - 2*area; round(gini, 3) # Gini-Koeffizient
}
x <- c(2, 8, 10, 15, 20, 45); n <- length(x)
u <- c(0, (1:n)/n) # Abszisse - relativer Index
v <- c(0, (cumsum(x) / sum(x))) # Odinate - kuml. rel. Anteile
gini(u, v)
## [1] 0.427
# Lorenzkurve und Gini-Koeffizient
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="n", ps=14)
x <- c(2, 8, 10, 15, 20, 45); n <- length(x)
u <- c(0, (1:n)/n) # Abszisse - relativer Index
v <- c(0, (cumsum(x) / sum(x))) # Odinate - kumul. rel. Anteile
plot(u, v, type="b", cex=1.0, xlim=c(0,1), ylim=c(0,1), las=1,
xlab="Anteil Firmen (u)", ylab="Marktanteil (v)")
abline(0,1, col="red", lty=2)
text(0.8, 0.20, paste("Gini-Koeffizient =",round(gini(u, v),3),"(G=2F)"))
text(0.35, 0.6, "Konzentrationsfläche F")
lines(c(0.4,0.6),c(0.55,0.4), lwd=2)
lines(c(0,0.5,0.5), c(0.2,0.2,0), lty=2)
polygon(c(u,0), c(v,0), angle = -45, border=NA, density = 10)
gini_num <- function(x, unbiased =FALSE) { # Gini-Koeffizient
N <- length(x)
mu <- mean(x)
n <- if (unbiased) N * (N-1) else N * N
ox <- x[order(x)]
dsum <- drop(crossprod(2 * 1:N - N - 1, ox))
round(dsum / (mu * n), 3)
}
x <- c(2, 8, 10, 15, 20, 45); gini_num(x, unbiased=FALSE)
## [1] 0.427
x <- seq(18, 29, by=1) # Alter und Körpergröße in Kalama
y <- c(76.1, 77.0, 78.1, 78.2, 78.8, 79.7, 79.9, 81.1, 81.2, 81.8, 82.8, 83.5)
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=14, cex.axis=1)
plot(x, y, pch=16, cex=1.5, las=1,
xlab="Alter [Monate]", ylab="Größe [cm]",
xlim=c(17, 30), ylim=c(75, 85))
x <- c(13, 17, 10, 17, 20, 11, 15)
y <- c(12, 17, 11, 13, 16, 14, 15)
var(x); var(y); # Varianz
## [1] 12.90476
## [1] 4.666667
cov(x, y) # Kovarianz
## [1] 5.5
cor(x, y) # Korrelationskoeffizient
## [1] 0.7087357
x <- c(2.4, 2.4, 2.3, 2.2, 2.1, 1.8, 2.3, 2.3, 2.2, 2.0, 1.9, 1.7, 2.2, 1.8, 3.2,
3.2, 2.7, 2.5, 2.2, 1.9, 1.9, 1.8, 2.7, 2.5, 2.3, 2.0, 2.6, 2.4, 1.8, 1.7)
n <- length(x)
t <- seq(1, n, by=1)
acorr <- acf(x, lag.max=5, plot=FALSE); acorr
##
## Autocorrelations of series 'x', by lag
##
## 0 1 2 3 4 5
## 1.000 0.398 0.017 -0.234 -0.280 -0.359
par(mfrow=c(1,2), lwd=2, font.axis=2, bty="n", ps=14)
plot(t, x, ylim=c(1.5,3.5), xlim=c(0,n), las=1)
acorr <- acf(x, lag.max=5, plot=FALSE)
plot(acorr, main=" ", las=1, ylab="Autokorrelation", xlab="Abstand (lag)")
# fiktive Beispieldaten
test <- read.csv("reliabilität.csv", header = TRUE, sep = ";")
attach(test); test[1:5, 1:8]
k <- 6 # Anzahl der Items
cor(score, r_score) # Test-Retest Reliabilitaet
## [1] 0.8404992
# Split-Half
test1 <- cbind(item1, item2, item3); score1 <- apply(test1, 1, sum)
test2 <- cbind(item4, item5, item6); score2 <- apply(test2, 1, sum)
r_tt <- cor(score1, score2); r_tt # Reliabilitaet
## [1] 0.763246
r_tt1 <- 2*r_tt / (1+r_tt); r_tt1 # Spearman-Brown Korrektur
## [1] 0.8657283
# Guttmann Koeffizient
r_tt2 <- 2 * (1 - (var(score1) + var(score2))/var(score)); r_tt2
## [1] 0.8539877
mat <- cbind(item1, item2, item3, item4, item5, item6)
si <- sum(diag(cov(mat))) # Item Varianzen
st <- var(score) # Gesamtvarianz im Score
alpha <- k/(k-1) * (1 - si/st); alpha # Cronbach's Alpha
## [1] 0.7038037
Funktion alpha() in library(psych)
library(psych)
alpha(mat) # Funktion alpha() in library(psych)
##
## Reliability analysis
## Call: alpha(x = mat)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.7 0.73 0.91 0.32 2.8 0.17 2.4 0.64 0.41
##
## lower alpha upper 95% confidence boundaries
## 0.37 0.7 1.04
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## item1 0.55 0.61 0.88 0.24 1.5 0.27 0.134 0.32
## item2 0.63 0.68 0.86 0.30 2.1 0.21 0.118 0.39
## item3 0.70 0.74 0.89 0.36 2.8 0.18 0.133 0.49
## item4 0.55 0.56 0.83 0.20 1.3 0.26 0.126 0.18
## item5 0.71 0.74 0.89 0.37 2.9 0.17 0.083 0.45
## item6 0.78 0.79 0.90 0.43 3.7 0.13 0.083 0.49
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## item1 8 0.85 0.86 0.82 0.738 2.6 1.06
## item2 8 0.71 0.71 0.69 0.524 2.6 1.06
## item3 8 0.53 0.54 0.48 0.316 2.0 0.93
## item4 8 0.94 0.94 0.95 0.910 2.0 0.76
## item5 8 0.51 0.52 0.48 0.274 2.9 0.99
## item6 8 0.39 0.37 0.32 0.096 2.2 1.16
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## item1 0.12 0.38 0.25 0.25 0
## item2 0.12 0.38 0.25 0.25 0
## item3 0.38 0.25 0.38 0.00 0
## item4 0.25 0.50 0.25 0.00 0
## item5 0.12 0.12 0.50 0.25 0
## item6 0.25 0.50 0.00 0.25 0
L <- c(1, 2, 2, 2, 3, 3, 4, 4); M <- c(2, 4, 1, 3, 4, 3, 4, 3)
r.L <- rank(L, ties.method="average"); r.L # Rangzahlen zu x
## [1] 1.0 3.0 3.0 3.0 5.5 5.5 7.5 7.5
r.M <- rank(M, ties.method="average"); r.M # Rangzahlen zu y
## [1] 2 7 1 4 7 4 7 4
D <- r.L - r.M; n <- length(D)
1- 6*sum(D^2)/(n*(n^2-1)) # Rangkorrelationskoeffizient (Spearman)
## [1] 0.5357143
cor(r.L, r.M) # Korrelationskoeffizient aus Rangzahlen
## [1] 0.4935481
# Asbest und Auftreten von Lungentumoren
asbest <- c(50, 400, 500, 900, 1100, 1600, 1800, 2000, 3000)
lungca <- c(2, 6, 5, 10, 26, 42, 37, 28, 50)
lm(lungca ~ asbest)
##
## Call:
## lm(formula = lungca ~ asbest)
##
## Coefficients:
## (Intercept) asbest
## 0.54047 0.01772
# lineare Regression
reg <- lm(lungca ~ asbest); reg$coefficients
## (Intercept) asbest
## 0.54046893 0.01772121
lungca.hat <- reg$coefficients[2] + reg$coefficients[2]*asbest
resid <- lungca - lungca.hat
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="l", ps=14, cex.axis=1)
plot(asbest, lungca, pch=16, cex=1.5, las=1, ylab="Tumorentstehung [%]",
xlab="Asbest-Exposition [Fasern/ml]", ylim=c(0, 50), xlim=c(0,3000))
abline(reg$coefficients[1], reg$coefficients[2], col="red", lwd=2)
plot(asbest, resid, cex=1.5, las=1, xlim=c(0, 3000), pch=16, ylim=c(-10, ++10),
xlab="Asbest-Exposition [Fasern/ml]", ylab="Residuen")
abline(h=0, col="red", lwd=2)
x <- c(13, 17, 10, 17, 20, 11, 15)
y <- c(12, 17, 11, 13, 16, 14, 15)
# orthogonale Regression
Q.x <- sum((x - mean(x))^2); Q.y <- sum((y - mean(y))^2)
Q.xy <- sum((x - mean(x))*(y - mean(y)))
b <- (-(Q.x-Q.y)+sqrt((Q.x-Q.y)^2+4*Q.xy^2)) / (2 * Q.xy); b
## [1] 0.5004332
a <- mean(y) - b*mean(x); a
## [1] 6.636483
r1 <- lm(y ~ x); a1 <- r1$coeff[1]; b1 <- r1$coeff[2]
r2 <- lm(x ~ y); a2 <- r2$coeff[1]; b2 <- r2$coeff[2]
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l",
cex.lab=1.4, ps=12, cex.axis=1)
plot(x, y, pch=16, cex=1.4, ylab="Y", xlab="X", las=1,
ylim=c(10, 20), xlim=c(10, 20))
abline(a, b, col="red", lwd=2)
abline(a1, b1, lty=2)
abline(-a2/b2, 1/b2, lty=2)
Funktion rq() in library(quantreg)
Funktion rlm() in library(MASS)
library(quantreg)
library(MASS)
Jahr <- seq(1950, 1973, by=1) # internationale Telephonate in Belgien 1950 - 1973
Anzahl <- c(0.44,0.47,0.47,0.59,0.66,0.73,0.81,0.88,1.06,1.2,1.35,1.49,1.61,
2.12,11.90,12.40,14.20,15.90,18.20,21.20,4.30,2.40,2.70,2.90)
OLS.regr <- lm(Anzahl ~ Jahr); OLS.regr
##
## Call:
## lm(formula = Anzahl ~ Jahr)
##
## Coefficients:
## (Intercept) Jahr
## -983.8868 0.5041
LAD.regr <- rq(Anzahl ~ Jahr, tau=0.5); LAD.regr
## Call:
## rq(formula = Anzahl ~ Jahr, tau = 0.5)
##
## Coefficients:
## (Intercept) Jahr
## -228.4790909 0.1172727
##
## Degrees of freedom: 24 total; 22 residual
HUBER.regr <- rlm(Anzahl ~ Jahr, psi = psi.huber, k2 = 1.345); HUBER.regr
## Call:
## rlm(formula = Anzahl ~ Jahr, psi = psi.huber, k2 = 1.345)
## Ran 20 iterations without convergence
##
## Coefficients:
## (Intercept) Jahr
## -410.5620825 0.2105068
##
## Degrees of freedom: 24 total; 22 residual
## Scale estimate: 0.985
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=12,
cex.lab=1.4, cex.axis=1.2)
plot(Jahr, Anzahl, pch=16, cex=1.3, ylab="Anzahl", las=1,
xlab="Jahr", ylim=c(0, 22), xlim=c(1950,1975))
abline(OLS.regr$coefficients[1],
OLS.regr$coefficients[2], col="red", lwd=2, lty=1)
text(1970, 11, "OLS", cex=1.3)
abline(LAD.regr$coefficients[1],
LAD.regr$coefficients[2], col="blue", lwd=2, lty=2)
text(1968, 1.2, "LAD", cex=1.3)
abline(HUBER.regr$coefficients[1],
HUBER.regr$coefficients[2], col="blue", lwd=2, lty=3)
text(1971, 5.6, "HUBER", cex=1.3)
par(mfrow=c(1,3), lwd=2, font.axis=2, bty="l", ps=14,
cex.axis=1.2, cex.lab=1.4)
x <- seq(0, 15, by=0.1)
y1 <- 1 + 1*x + 0.05*x^2; y2 <- 1 + 1*x - 0.05*x^2
y3 <- 10 - 1*x + 0.05*x^2; y4 <- 10 - 1*x - 0.05*x^2
plot(x, y1, type ="l", main="quadratisch", xlab=" ", las=1,
ylab=" ", ylim=c(0,15))
lines(x, y2); lines(x, y3); lines(x, y4)
text(12.1, 12, expression(y[1] == 1 + 1*x + 0.05*x^2), cex=1.3)
text(12, 7, expression(y[2] == 1 + 1*x - 0.05*x^2), cex=1.3)
text( 9.1, 4.5, expression(y[3] == 10 - 1*x + 0.05*x^2), cex=1.3)
text(11.2, 1, expression(y[4] == 10 - 1*x - 0.05*x^2), cex=1.3)
x <- seq(0.1, 12, by=0.1); y1 <- 1 + 10/x; y2 <- 20 - 10/x
plot(x, y1, type="l", main="hyperbolisch", xlab=" ", las=1,
ylab=" ", ylim=c(0,20))
lines(x, y2)
text( 7, 4, expression(y[1] == 1 + frac(10,x)), cex=1.3)
text( 5,16, expression(y[2] == 20 - frac(10,x)), cex=1.3)
x <- seq(0.1, 12, by=0.1); e <- exp(1)
y1 <- 0.1 * e^(0.5*x); y2 <- 10 * e^(-0.5*x)
plot(x, y1, type="l", main="exponentiell", xlab=" ", las=1,
ylab=" ", ylim=c(0,20))
lines(x, y2)
text(7.7, 15, expression(y[1] == 0.1 * plain(e)^{0.5*x}), cex=1.3)
text(3, 8, expression(y[2] == 10 * plain(e)^{-0.5*x}), cex=1.3)
x <- c(1, 2, 3, 4, 5) # nichtlineare Regression
y <- c(4, 1, 3, 5, 6)
nls( y ~ a + b*x + c*x^2, start = list(a = 1, b = 1, c = 1))
## Nonlinear regression model
## model: y ~ a + b * x + c * x^2
## data: parent.frame()
## a b c
## 5.4000 -2.6286 0.5714
## residual sum-of-squares: 3.829
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 9.257e-09
# Güte der Anpassung
mod <- nls( y ~ a + b*x + c*x^2, start = list(a = 1, b = 1, c = 1))
formula(mod); coef(mod)
## y ~ a + b * x + c * x^2
## a b c
## 5.4000000 -2.6285714 0.5714286
new <- data.frame(x = c(1,2,3,4,5))
predict(mod, new)
## [1] 3.342857 2.428571 2.657143 4.028571 6.542857
a <- coef(mod) [1]; b <- coef(mod) [2]; c <- coef(mod) [3]
y.hat <- predict(mod, new)
x.line <- seq(1, 5, by=0.1)
y.line <- a + b*x.line + c*x.line^2
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=12,
cex.lab=1.4, cex.axis=1.2)
plot(x, y, xlab="x - Wert", ylab="y - Wert", las=1,
ylim=c(1,7), cex=2, pch=16)
points(x, y.hat, cex=2, pch=18, col="red")
lines(x.line, y.line, lty=2, lwd=2, col="red")
x <- seq(0, 100,by=.5) # asymptotische Regression
a <- 10; b <- 2 ; c <- -2.8
t0.5 <- log(2)/exp(c); t0.5
## [1] 11.39856
y.asymp <- a + (b-a)*exp(-exp(c)*x)
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
cex.lab=1.5, cex.axis=1.2)
plot(x, y.asymp, type="l", las=1, col="red",
ylim=c(0,12), xlab=" ", ylab="y")
abline(h=a, lty=2); abline(h=b, lty=2)
y1 <- a + (b-a)*exp(-exp(c)*t0.5)
lines(c(t0.5,t0.5), c(0,y1), lty=3)
lines(c(0, t0.5), c(y1, y1), lty=3)
text(50, a+0.5, paste("a = ",a), cex=1.3)
text(50, b+0.5, paste("b = ",b), cex=1.3)
text(45, b+(a-b)/2, paste("c = ",c," (t = ",round(t0.5,2),")"), cex=1.3)
x <- seq(0, 100,by=.5) # asymptotische Regression mit Offset
a <- 10; b <- -2.8; c <- 15
t0.5 <- log(2)/exp(b); t0.5
## [1] 11.39856
y.asympoff <- a*(1-exp(-exp(b)*(x-c)))
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
cex.lab=1.5, cex.axis=1.2)
plot(x, y.asympoff, type="l", ylim=c(0,12), xlab=" ", las=1, col="red",
ylab="y (asymptotische Regression mit Offset)")
abline(h=a, lty=2)
y1 <- a*(1-exp(-exp(b)*(x-c)))
lines(c(c, c), c(0, a/2+1), lty=3)
lines(c(c, c+t0.5), c(a/2, a/2), lty=3)
text(50, a+0.5, paste("a = ", a), cex=1.3)
text(c, a/2+1.5, paste("c = ",c), cex=1.3)
text(50, a/2, paste("b = ",b," (t = ",round(t0.5,2),")"))
x <- seq(0, 100,by=.5) # logistische Regression
a <- 10; b <- 50; c <- 10
y.logis <- a / (1 + exp((b-x)/c))
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
cex.lab=1.5, cex.axis=1.2)
plot(x, y.logis, type="l", ylim=c(0,12), xlab=" ", ylab="y", las=1, col="red")
abline(h=a, lty=2)
y1 <- a - 2
lines(c(b, b), c(0, y1), lty=2)
y2 <- 0.73 * a; y2
## [1] 7.3
x2 <- b - c * log(1/0.73 - 1); x2
## [1] 59.94623
lines(c(b, x2), c(y2, y2), lty=2)
text(80, a+0.5, paste("a = ",a), cex=1.3)
text(50, y1 + 0.5, paste("b = ",b), cex=1.3)
text(75, y2, paste("c = ",c), cex=1.3)
x <- seq(0, 10, by=.05) # Compartment-Modell Regression
dose <- 1.5; elim <- 0.5; absorp <- 2.0; clear <- 0.2
a <- exp(elim); a
## [1] 1.648721
b <- exp(absorp); b
## [1] 7.389056
c <- exp(clear); c
## [1] 1.221403
y.fol <- (dose * elim * absorp / clear * (absorp-elim)) *
(exp(-elim*x)-exp(-absorp*x))
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
cex.lab=1.5, cex.axis=1.2)
plot(x, y.fol, type="l", ylim=c(0,6), xlab=" ", ylab="y", las=1, col="red")
# Michaelis-Menton Gleichung
conc <- c(0.02, 0.02, 0.06, 0.06, 0.11, 0.11, 0.22,
0.22, 0.56, 0.56, 1.10, 1.10)
rate <- c(76, 47, 97, 107, 123, 139, 159, 152, 191, 201, 207, 200)
nls(rate ~ SSmicmen(conc, Vm, K))
## Nonlinear regression model
## model: rate ~ SSmicmen(conc, Vm, K)
## data: parent.frame()
## Vm K
## 212.68371 0.06412
## residual sum-of-squares: 1195
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 1.937e-06
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
cex.lab=1.5, cex.axis=1.2)
fmod <- nls(rate ~ SSmicmen(conc, Vm, K))
plot(conc, rate, xlab="Konzentration [ppm]", las=1,
ylab=expression(Counts / min^2),
xlim=c(0,1.2), ylim=c(40,220), cex=1.5, pch=16)
lines(conc, predict(fmod, list(conc = conc)), lwd=2, col="red")
x <- c(1, 2, 3, 4, 5) # Beispiel zu den Normalgleichungen
y <- c(3, 7,12,26,51)
nls( y ~ a*b^x, start = list(a = 1, b = 1))
## Nonlinear regression model
## model: y ~ a * b^x
## data: parent.frame()
## a b
## 1.602 1.999
## residual sum-of-squares: 1.225
##
## Number of iterations to convergence: 7
## Achieved convergence tolerance: 6.097e-06
# linearisierende Funktionen
x <- seq(0, 10, by=0.1)
a <- 2; b <- 0.2
y1 <- a * exp(b*x) # Exponentialfnktion
plot(x, y1, type="b", ylim=c(0, 10), cex = 0.5,
ylab="y", main="Exponentialfunktion")
abline(h=2, lty=3)
text(0.1, 3, "a")
text(3.5, 6, "a>0, b>0")
a <- 2; b <- -0.4
y1 <- a * exp(b*x)
lines(x, y1, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(5, 1, "a>0, b<0")
a <- 0.1; b <- 2.5
y2 <- a * x^b # Potenzfunktion
plot(x, y2, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="Potenzfunktion")
abline(a, 1, lty=3)
text(4.5, 9, "a>0, b>1")
text(9, 8, "b=1")
a <- 2; b <- 0.5
y2 <- a * x^b
lines(x, y2, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(7, 4, "a>0, b<1")
a <- 8; b <- 0.5
y3 <- a*x / (b + x) # Hyperbelfunktion
plot(x, y3, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="Hyperbel")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 5.5, "a>0, b>0")
a <- 8; b <- -0.7
y4 <- (1 - exp(b*x))*a # exponent. Sättigung
plot(x, y4, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="exponent. Sättigung")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 5.5, "a>0, b<0")
a <- 8; b <- -0.7
y5 <- a / (1 + 10*exp(b*x)) # exponent. Sigmoid
plot(x, y5, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="exponent. Sigmoid")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 1, "a>0, b<0, c>0")
a <- 4; b <- -0.8
y6 <- a / (b + x) # modifizierte Inverse
plot(x, y6, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="modifizierte Inverse")
abline(h=a/b, lty=3)
abline(v=-b, lty=3)
text(0.3, 9, "x = -b")
text(2.8, 9, "a>0, b<0")
a <- 4; b <- 0.7
y6 <- a / (b + x)
lines(x, y6, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(2.5, 0.5, "a>0, b>0")
a <- 5; b <- -0.5
y7 <- a * exp(b/x) # modifizierte Exponentialfunktion
plot(x, y7, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="modif. Exponentialfunktion")
abline(h=5, lty=3)
text(0.3,5.8,"a")
text(2.5, 3, "a>0, b<0")
a <- 5; b <- 0.3
y7 <- a * exp(b/x)
lines(x, y7, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(2, 8, "a>0, b>0")
a <- 10; b <- -0.4
y8 <- a * x * exp(b*x) # Maximafunktion
plot(x, y8, type="b", ylim=c(0, 10), cex =0.5,
ylab="y", main="Maximafunktion")
text(5, 4, "a>0, b<0")
dt <- read.csv2("nonparegdat.csv")
dt[1:10, ]
x <- dt$x; y <- dt$y
plot(x, y)
# Regressogramm
regramm <- function(x, y, h) {
cuts <- seq(min(x), max(x)+h, by=h)
bins <- cut(x, cuts)
yval <- aggregate(y~bins, FUN=mean)
return(stepfun(x=cuts[c(-1,-length(cuts))], y=yval$y))
}
par(mfcol=c(1,3), lwd=2, font.axis=2, bty="l", ps=18)
plot(x, y, main="Regressogramm")
lines(regramm(x, y, h=0.5), col="blue", lwd=2)
text(15,1.5,"(A)", cex=1.5)
plot(x, y, main="gleitender Mittelwert") # gleitender Mittelwert
lines(ksmooth(x, y, kernel="box", bandwidth=1), col="blue", lwd=2)
text(15,1.5,"(B)", cex=1.5)
plot(x, y, main="Kernschätzer (normal)") # gleitende Anpassung
lines(ksmooth(x, y, kernel="normal", bandwidth=2), col="blue", lwd=2)
text(15,1.5,"(C)", cex=1.5)
x1 <- 1:7; xs1 <- seq(1.0, 7.5, by=0.1) # erzeugen künstlicher Daten
x2 <- 8:14; xs2 <- seq(7.5, 14.5, by=0.1)
x3 <- 15:21; xs3 <- seq(14.5, 21.0, by=0.1)
x <- c(x1, x2, x3); xs <- seq(1,21, by=0.1)
y1 <- c(5,6.5,7,8,7,5,4);
y2 <- c(3,2.5,1,1,3,2,5);
y3 <- c(6,8,10,9,8,7,7.5)
y <- c(y1, y2, y3)
# 'truncate' Polynom 3. Grades
tpow <- function(x, t) (x - t) ^ 3 * (x > t)
par(mfrow=c(1,3), lwd=2, font.axis=2, bty="l", ps=10)
plot(x, y, main="(A) Spline-Interpolation (kubisch)") # splines
lines(spline(x, y, xout=seq(1,21, by=0.1)))
plot(x, y, main="(B) polynom. Regression in Abschnitten")
abline(v=c(7.5, 14.5), lty=2)
s1 <- lm(y1 ~ x1 + I(x1^2) + I(x1^3)) # 1. Abschnitt
lines(xs1, predict(s1, data.frame(x1=xs1)))
s2 <- lm(y2 ~ x2 + I(x2^2) + I(x2^3)) # 2. Abschnitt
lines(xs2, predict(s2, data.frame(x2=xs2)))
s3 <- lm(y3 ~ x3 + I(x3^2) + I(x3^3)) # 3. Abschnitt
lines(xs3, predict(s3, data.frame(x3=xs3)))
symbols(7.5, 3.2, circles = 0.2, inches=0.25, add=TRUE,
lty=3, fg="darkgrey")
symbols(14.5, 4.6, circles = 0.2, inches=0.45, add=TRUE,
lty=3, fg="darkgrey")
# Spline-Interpolation (kubisch)
plot(x, y, main="(C) Spline Anpassung (kubisch)")
fm <- lm(y ~ x + I(x^2) + I(x^3) + I(tpow(x,7.5)) + I(tpow(x,14.5)))
xs <- seq(1, 21, by=0.1)
lines(xs, predict(fm, data.frame(x=xs)), col="red")
abline(v=c(7.5, 14.5), lty=2)