Hinweis: Die Gliederung zu den Befehlen Und Ergebnissen in R orientiert sich an dem Inhaltsverzeichnis der 17. Auflage der ‘Angewandten Statistik’. Nähere Hinweise zu den verwendeten Formeln sowie Erklärungen zu den Beispielen sind in dem Buch (Ebook) nachzulesen!
Hinweis: Die thematische Gliederung zu den aufgeführten Befehlen und Beispielen orientiert sich an dem Inhaltsverzeichnis der 17. Auflage der ‘Angewandten Statistik’. Nähere Hinweise zu den verwendeten Formeln sowie Erklärungen zu den Beispielen sind in dem Buch (Ebook) nachzulesen!
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
## [5] LC_TIME=German_Germany.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.6.3 magrittr_1.5 tools_3.6.3 htmltools_0.4.0
## [5] yaml_2.2.1 Rcpp_1.0.4 stringi_1.4.6 rmarkdown_2.1
## [9] knitr_1.28 stringr_1.4.0 xfun_0.12 digest_0.6.25
## [13] rlang_0.4.5 evaluate_0.14
Download von Datensätzen, die in den folgenden Beispielen verwendet werden:
Infarkt-Daten: infarkt
mean(c(4,6,8,9)) # Mittelwertberechnung
## [1] 6.75
sqrt(5) # Wurzelfunktion
## [1] 2.236068
round(5.23454, digits=3) # Rundung auf 3 Dezimalziffern
## [1] 5.235
# help()
# help(sqrt) # Quadratwurzel
# ?srt
# ?mean
example(mean) # arithmetischer Mittelwert
##
## mean> x <- c(0:10, 50)
##
## mean> xm <- mean(x)
##
## mean> c(xm, mean(x, trim = 0.10))
## [1] 8.75 5.50
x <- 1:10 # numerisch
length(x)
## [1] 10
mode(x)
## [1] "numeric"
name <- c("Statistik","Mathematik") # Zeichenketten
length(name)
## [1] 2
mode(name)
## [1] "character"
logic <- c(TRUE, FALSE) # logisch
mode(logic)
## [1] "logical"
wurzel.12 <- sqrt(12) # Variablen
wurzel.12 # Wurzel aus 12
## [1] 3.464102
data(Titanic) # Datenrahmen (Tabellenstruktur)
str(Titanic) #
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
mat <- matrix(c("a","b","c","d"),
nrow=2) # Matrix
mat
## [,1] [,2]
## [1,] "a" "c"
## [2,] "b" "d"
is.matrix(mat)
## [1] TRUE
is.numeric(mat)
## [1] FALSE
library(date) # Datumsangaben
dat <- as.date(c("15jul2008","10mar1980"))
dat
## [1] 15Jul2008 10Mar80
(dat[1] - dat[2])/360
## [1] 28.76111
dat1 <- mdy.date(7, 15, 2008)
dat2 <- mdy.date(3, 10, 1980)
str(dat1); str(dat2)
## 'date' int 17728
## 'date' int 7374
(dat1 - dat2)/360
## [1] 28.76111
ls() # Objekte im 'Arbeitsspeicher'
## [1] "colorize" "dat" "dat1" "dat2" "logic" "mat"
## [7] "name" "opar" "Titanic" "wurzel.12" "x" "xm"
rm(list = ls()) # Löschen aller Objekte im Arbeitspeicher
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
20:15
## [1] 20 19 18 17 16 15
seq(1, 5, by=0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
seq(1, 5, length=11)
## [1] 1.0 1.4 1.8 2.2 2.6 3.0 3.4 3.8 4.2 4.6 5.0
rep(5, 20)
## [1] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
c(1, 7:9)
## [1] 1 7 8 9
c(1:5, 10.5, "next")
## [1] "1" "2" "3" "4" "5" "10.5" "next"
m <- rep(1, 15); w <- rep(2, 20)
sex <- c(m, w)
faktor <- factor(sex, levels=1:2, labels=c("männlich", "weiblich"))
summary(faktor)
## männlich weiblich
## 15 20
alter <- rnorm(50, mean=40, sd=10)
summary(alter)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.96 33.63 38.70 40.49 48.10 63.15
alter.k <- cut(alter, breaks=c(10, 20, 30, 40, 50, 60, 70))
summary(alter.k)
## (10,20] (20,30] (30,40] (40,50] (50,60] (60,70]
## 2 6 21 13 5 3
mat <- matrix(1:9, ncol=3, byrow=T); mat
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
mat[1,] # Zeilen
## [1] 1 2 3
mat[,2] # Spalten
## [1] 2 5 8
mat[2,2] # Zellen
## [1] 5
margin.table(mat, 1) # Zeilensummen
## [1] 6 15 24
margin.table(mat, 2) # Spaltensummen
## [1] 12 15 18
mat <- rbind(mat, c(10, 11, 12)); mat # Zeilen ergänzen (Verbinden)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
mat <- cbind(mat, c(13, 14, 15, 16)); mat # Spalten ergänzen (verbinden)
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 13
## [2,] 4 5 6 14
## [3,] 7 8 9 15
## [4,] 10 11 12 16
count <- matrix(c(12, 6, 5, 10), # Zeilen- / Spaltenbezeichnungen
ncol=2, byrow=T,
dimnames=list(c("Zeile 1", "Zeile 2"), c("Spalte 1", "Spalte 2")))
count
## Spalte 1 Spalte 2
## Zeile 1 12 6
## Zeile 2 5 10
n <- 10
data <- matrix(c(11, 6,11, 9, 8, 6, 2, 7, 4, 9,10, 4,
6, 4, 9, 6, 5, 4, 1, 2, 10,10, 6, 9,
6, 8, 2, 1, 2, 2, 8, 4, 8, 2, 4,10,
9, 7, 6, 9), nrow=n, byrow=T,
dimnames = list(
c("S1","S2","S3","S4","S5","S6","S7","S8","S9","S10"),
c("V.1", "V.2", "V.3","V.4"))); data
## V.1 V.2 V.3 V.4
## S1 11 6 11 9
## S2 8 6 2 7
## S3 4 9 10 4
## S4 6 4 9 6
## S5 5 4 1 2
## S6 10 10 6 9
## S7 6 8 2 1
## S8 2 2 8 4
## S9 8 2 4 10
## S10 9 7 6 9
one <- rep(1,10); one # Vektor mit Einsen "1"
## [1] 1 1 1 1 1 1 1 1 1 1
summe <- one %*% data; summe # Summen über "inner product" mit Einsen
## V.1 V.2 V.3 V.4
## [1,] 69 58 59 61
mittel <- summe / n; mittel # Mittelwerte
## V.1 V.2 V.3 V.4
## [1,] 6.9 5.8 5.9 6.1
# Abweichungen vom Mittelwert
deviat <- data - one %*% mittel; round(deviat,2)
## V.1 V.2 V.3 V.4
## S1 4.1 0.2 5.1 2.9
## S2 1.1 0.2 -3.9 0.9
## S3 -2.9 3.2 4.1 -2.1
## S4 -0.9 -1.8 3.1 -0.1
## S5 -1.9 -1.8 -4.9 -4.1
## S6 3.1 4.2 0.1 2.9
## S7 -0.9 2.2 -3.9 -5.1
## S8 -4.9 -3.8 2.1 -2.1
## S9 1.1 -3.8 -1.9 3.9
## S10 2.1 1.2 0.1 2.9
# Kovarianzmatrix
covar <- t(deviat) %*% deviat/(n - 1); round(covar,2)
## V.1 V.2 V.3 V.4
## V.1 7.88 2.76 0.32 6.79
## V.2 2.76 7.73 0.87 -0.09
## V.3 0.32 0.87 12.77 3.46
## V.4 6.79 -0.09 3.46 10.32
# Varianzen aus der Diagonalen
varia <- diag(covar); round(varia,2)
## V.1 V.2 V.3 V.4
## 7.88 7.73 12.77 10.32
# Korrelationskoeffizienten
sdi <- diag(1/sqrt(diag(covar)))
correl <- sdi %*% covar %*% sdi; round(correl,2)
## [,1] [,2] [,3] [,4]
## [1,] 1.00 0.35 0.03 0.75
## [2,] 0.35 1.00 0.09 -0.01
## [3,] 0.03 0.09 1.00 0.30
## [4,] 0.75 -0.01 0.30 1.00
alter <- c(19, 22, 24)
geschlecht <- c("maennlich","weiblich","maennlich")
groesse <- c(170, 165, 181)
studenten <- data.frame(alter, geschlecht, groesse)
studenten
studenten[2,] # Auswahl 2. Zeile (fallweise)
studenten[geschlecht=="maennlich",] # Auswahl nach dem Geschlecht
Einlesen von Daten aus externe Datei (CSv-Format)
infarkt <- read.csv2("infarkt.csv")
infarkt[1:10, 1:9]
str(infarkt) # Datenstruktur
## 'data.frame': 80 obs. of 14 variables:
## $ Nummer : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Gruppe : Factor w/ 2 levels "Infarkt","Kontrolle": 1 1 1 1 1 1 1 1 1 1 ...
## $ Geschlecht: Factor w/ 2 levels "m","w": 1 1 1 1 1 1 1 1 1 1 ...
## $ Alter : int 39 43 45 50 51 52 53 53 56 56 ...
## $ RelGew : int 128 106 94 98 132 108 165 116 138 140 ...
## $ RRsyst : int 155 145 160 200 190 195 220 140 180 175 ...
## $ RRdias : int 95 95 90 110 110 110 105 85 100 95 ...
## $ Blutz : int 100 140 95 80 90 90 70 85 200 90 ...
## $ Diabet : logi FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ Chol : int 195 205 245 190 260 190 340 195 285 380 ...
## $ Trigl : int 146 138 200 101 202 164 150 93 135 127 ...
## $ Hbdh : int 410 380 260 165 300 290 605 170 277 523 ...
## $ Got : num 28.3 19 15.4 23.2 20.6 18.7 33.1 20.2 18.2 29.7 ...
## $ Zigarr : int 30 0 15 15 5 20 0 5 20 10 ...
attach(infarkt) # vereinfachter Zugriff über Namen (cave!!!)
# edit(infarkt) # Änderungen in der Tabelle
attach(infarkt)
Chol
## [1] 195 205 245 190 260 190 340 195 285 380 220 240 235 215 190 275 205 290 200
## [20] 210 220 265 235 200 350 220 800 230 185 295 380 200 485 210 185 210 395 290
## [39] 190 210 220 200 185 220 215 135 220 180 220 135 150 165 150 195 160 190 85
## [58] 180 160 200 205 230 125 195 100 185 180 205 160 195 190 175 140 145 170 150
## [77] 180 190 175 200
mean(Chol)
## [1] 219.75
Berechnungen / Transformationen / Rekodierungen:
infarkt <- transform(infarkt, lnHbdh=log(Hbdh))
infarkt$lnHbdh
## [1] 6.016157 5.940171 5.560682 5.105945 5.703782 5.669881 6.405228 5.135798
## [9] 5.624018 6.259581 6.595781 5.433722 5.880533 5.652489 5.517453 5.783825
## [17] 5.379897 5.666427 4.955827 5.463832 4.934474 6.602588 6.434547 6.495266
## [25] 5.252273 5.541264 6.198479 5.631212 6.200509 5.918894 6.107023 5.560682
## [33] 4.859812 5.609472 5.572154 5.743003 6.177944 6.077642 6.175867 5.799093
## [41] 4.276666 5.247024 4.584967 5.003946 4.532599 4.204693 4.356709 4.007333
## [49] 4.510860 4.882802 4.330733 4.955827 3.713572 4.553877 4.234107 4.615121
## [57] 5.036953 5.111988 4.672829 5.327876 4.700480 4.644391 3.850148 4.682131
## [65] 4.465908 4.905275 4.941642 5.267858 5.575949 4.488636 4.077537 5.017280
## [73] 4.007333 5.043425 4.969813 5.081404 4.890349 4.812184 4.605170 4.605170
replace(1:10, list=c(2, 4, 6), values=c(20, 40, 60)) #Funktion replace()
## [1] 1 20 3 40 5 60 7 8 9 10
re.code <- function(var, alt, neu) { # Umkodieren einzelner Faktorstufen
x <- as.vector(var)
i <- which(x == alt); ni <- length(i)
x <- replace(x, list=i, values=rep(neu, ni))
if (is.factor(x)) factor(x) else x }
re.code(as.factor(c("A", "A", "A", "B", "B", "C", "C")), "B", "X")
## [1] "A" "A" "A" "X" "X" "C" "C"
infarkt <- transform(infarkt, Gruppe=re.code(Gruppe, "Infarkt", "1"))
infarkt <- transform(infarkt, Gruppe=re.code(Gruppe, "Kontrolle", "0"))
infarkt$Gruppe
## [1] "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1"
## [20] "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1"
## [39] "1" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
## [58] "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
## [77] "0" "0" "0" "0"
Funktion recode() in library(car)
library(car)
farben <- c("red", "purple", "blue", "blue", "orange", "red", "orange")
farben
## [1] "red" "purple" "blue" "blue" "orange" "red" "orange"
recode(farben, "'red'='rot'; 'blue'='blau'; 'purple'='violett'")
## [1] "rot" "violett" "blau" "blau" "orange" "rot" "orange"
Stutzen der Verteilung (Nachweisgrenze)
daten <- c(5, 9, 11, 8, 9, 3, 1, 13, 9, 12, 5, 12, 6, 3, 17, 5, 8, 7)
cutoff <- 10
ifelse(daten <= cutoff, daten, cutoff)
## [1] 5 9 10 8 9 3 1 10 9 10 5 10 6 3 10 5 8 7
Klasseneinteilung (kategoriell)
infarkt <- read.csv2("infarkt.csv")
infarkt$Alter
## [1] 39 43 45 50 51 52 53 53 56 56 57 57 59 59 61 61 62 62 67 68 68 71 72 74 75
## [26] 76 79 81 57 58 61 63 66 67 69 69 70 72 79 81 28 40 42 46 51 51 52 53 53 54
## [51] 55 55 57 58 59 60 61 61 62 62 62 64 64 65 65 36 42 42 45 48 49 49 51 52 55
## [76] 56 59 61 63 72
Klassen <- cut(infarkt$Alter, breaks=c(0, 50, 70, Inf),
labels=c("-50", ">50-70", ">70"))
Klassen
## [1] -50 -50 -50 -50 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70
## [11] >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70
## [21] >50-70 >70 >70 >70 >70 >70 >70 >70 >50-70 >50-70
## [31] >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >70 >70 >70
## [41] -50 -50 -50 -50 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70
## [51] >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70
## [61] >50-70 >50-70 >50-70 >50-70 >50-70 -50 -50 -50 -50 -50
## [71] -50 -50 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >50-70 >70
## Levels: -50 >50-70 >70
Wiederholte Beobachtungen:
c1 <- c(1,2,3,4)
c2 <- c(2,5,6,7)
c3 <- c(3,8,9,10)
df <- as.data.frame(rbind(c1,c2,c3))
names(df) <- c("Fall","t1","t2","t3"); df # weites Format von Daten
reshape(df, timevar="Zeit", idvar="Fall", varying=list(2:4),
v.names="Wert", direction="long") # langes Format
x <- c(1, 2, NA, 4); x
## [1] 1 2 NA 4
y <- x + 1; y
## [1] 2 3 NA 5
is.na(x)
## [1] FALSE FALSE TRUE FALSE
which(is.na(x)) # Auffinden fehlender Angaben
## [1] 3
Fehlende Anbgaben in Funktionsaufrufen!
x <- c(1, 2, NA, 4); x
## [1] 1 2 NA 4
mean(x)
## [1] NA
mean(x, na.rm=TRUE)
## [1] 2.333333
Fehlende Angaben in Datentabellen!
any.missing.value <- function(dfr=NA) { any(is.na(dfr)) }
alter <- c(19, 22, 24, 30, NA, 25)
geschlecht <- c("maennlich","weiblich","maennlich",NA,"weiblich","maennlich")
groesse <- c(170, NA, 181, 177, 182, 196)
studenten <- data.frame(alter, geschlecht, groesse)
studenten
y <- studenten[,c("alter","geschlecht","groesse")]
y.na <- !apply(y, 1, any.missing.value); y[y.na,]
colSums(is.na(studenten))
## alter geschlecht groesse
## 1 1 1
na.omit(studenten)
studenten[complete.cases(studenten),]
Zahl1bis20 <- 1:20
Zahl1bis20[6:10]
## [1] 6 7 8 9 10
Zahl1bis20[Zahl1bis20 > 13]
## [1] 14 15 16 17 18 19 20
blut <- c("A","B","AB","0")
blut[3]
## [1] "AB"
infarkt <- read.csv2("infarkt.csv")
attach(infarkt)
infarkt[, 10]
## [1] 195 205 245 190 260 190 340 195 285 380 220 240 235 215 190 275 205 290 200
## [20] 210 220 265 235 200 350 220 800 230 185 295 380 200 485 210 185 210 395 290
## [39] 190 210 220 200 185 220 215 135 220 180 220 135 150 165 150 195 160 190 85
## [58] 180 160 200 205 230 125 195 100 185 180 205 160 195 190 175 140 145 170 150
## [77] 180 190 175 200
Chol[5]
## [1] 260
set <- infarkt[Gruppe=="Infarkt" & Blutz>100, ]
set[, 1:9]
subset(infarkt, Geschlecht=="w" & Alter<45, select = c(Chol, Trigl))
alter <- c(19, 22, 24, 30, NA, 25)
geschlecht <- c("maennlich","weiblich","maennlich",NA,"weiblich","maennlich")
groesse <- c(170, NA, 181, 177, 182, 196)
studenten <- data.frame(alter, geschlecht, groesse)
studenten
studenten[geschlecht=="maennlich",]
Sortieren / Rangzahlen
a <- c(3, 7, 2, 8, 5, 10, 4); a
## [1] 3 7 2 8 5 10 4
sort(a)
## [1] 2 3 4 5 7 8 10
sort(a, decreasing=TRUE)
## [1] 10 8 7 5 4 3 2
a <- c(3, 7, 2, 8, 5, 10, 4)
rank(a)
## [1] 2 5 1 6 4 7 3
b <- c(3, 5, 7, 3, 6, 5)
rank(b)
## [1] 1.5 3.5 6.0 1.5 5.0 3.5
rank(b, ties.method = "min")
## [1] 1 3 6 1 5 3
o <- order(a)
a
## [1] 3 7 2 8 5 10 4
o
## [1] 3 1 7 5 2 4 6
a[o]
## [1] 2 3 4 5 7 8 10
Sortieren in Matrizen und Datentabellen
SortMat <- function(Mat, Sort) {
m <- do.call("order", as.data.frame(Mat[, Sort]))
Mat[m, ] }
alter <- c(19, 22, 24, 30, NA, 25)
geschlecht <- c("maennlich","weiblich","maennlich",NA,"weiblich","maennlich")
groesse <- c(170, NA, 181, 177, 182, 196)
studenten <- data.frame(alter, geschlecht, groesse)
studenten
SortMat(studenten, 3)
studenten[order(studenten[,3]),]
m <- matrix(c(2,5,4, 5,2,3, 2,6,7), byrow=T, nrow=3); m
## [,1] [,2] [,3]
## [1,] 2 5 4
## [2,] 5 2 3
## [3,] 2 6 7
SortMat(m, 2)
## [,1] [,2] [,3]
## [1,] 5 2 3
## [2,] 2 5 4
## [3,] 2 6 7
for (…) und if (…)
a <- rep(NA, 10) # for (...) if (...)
a
## [1] NA NA NA NA NA NA NA NA NA NA
for (i in 1:10) if (i<6) a[i]<-"unten" else a[i]<-"oben"
a
## [1] "unten" "unten" "unten" "unten" "unten" "oben" "oben" "oben" "oben"
## [10] "oben"
ifelse()
zaehler <- c(2, 4,NA, 8, 7, 5, 3, 1) # ifelse()
nenner <- c(4, 3, 2, 1, 0, 3, 2, 0)
quot <- round(ifelse(nenner != 0, zaehler/nenner, NA), 3)
round(quot, 2)
## [1] 0.50 1.33 NA 8.00 NA 1.67 1.50 NA
while (…)
i <- 0; summe <- 0 # while()
while (i < 10) {i <- i+1; summe <- summe + i }
summe
## [1] 55
sum(1:10)
## [1] 55
function()
stdabw <- function(x) { # function()
anzahl <- length(x)
summe <- sum(x)
mittel <- summe / anzahl
saq <- sum((x-mittel)^2)
return(sqrt(saq/(anzahl-1)))
}
x <- c (2,3,4,5,6,7)
stdabw(x)
## [1] 1.870829
apply()
# apply functions
X <- matrix(sample(1:20, 20, replace=T), ncol=4, byrow=T)
X
## [,1] [,2] [,3] [,4]
## [1,] 3 17 12 12
## [2,] 5 20 7 5
## [3,] 7 20 5 20
## [4,] 18 10 3 19
## [5,] 19 17 20 17
apply(X, 1, sum) # Zeilensummen
## [1] 44 37 52 50 73
apply(X, 2, sum) # Spaltensummen
## [1] 52 84 47 73
apply(X, 2, which.max) # Pos. max. Wert je Spalte
## [1] 5 2 5 3
apply(X, 1, function(x) {max(x) - min(x)}) # Spannweite je Zeile
## [1] 14 15 15 16 3
apply(X, 1:2, function(x) sqrt(x)) # Quadratwurzel elementweise
## [,1] [,2] [,3] [,4]
## [1,] 1.732051 4.123106 3.464102 3.464102
## [2,] 2.236068 4.472136 2.645751 2.236068
## [3,] 2.645751 4.472136 2.236068 4.472136
## [4,] 4.242641 3.162278 1.732051 4.358899
## [5,] 4.358899 4.123106 4.472136 4.123106
by()
# by functions
infarkt <- read.csv2("infarkt.csv")
by(infarkt[, 10:12], infarkt$Gruppe, summary)
## infarkt$Gruppe: Infarkt
## Chol Trigl Hbdh
## Min. :185.0 Min. : 70.00 Min. :129.0
## 1st Qu.:200.0 1st Qu.: 99.75 1st Qu.:253.5
## Median :220.0 Median :125.00 Median :295.0
## Mean :263.0 Mean :127.53 Mean :352.5
## 3rd Qu.:286.2 3rd Qu.:141.50 3rd Qu.:457.0
## Max. :800.0 Max. :203.00 Max. :737.0
## ------------------------------------------------------------
## infarkt$Gruppe: Kontrolle
## Chol Trigl Hbdh
## Min. : 85.0 Min. : 65.0 Min. : 41.00
## 1st Qu.:157.5 1st Qu.: 99.5 1st Qu.: 84.75
## Median :180.0 Median :123.5 Median :105.50
## Mean :176.5 Mean :130.1 Mean :116.03
## 3rd Qu.:200.0 3rd Qu.:158.8 3rd Qu.:145.25
## Max. :230.0 Max. :239.0 Max. :264.00
tapply()
# tapply functions
x <- 1:12
g <- factor(sample(1:3, 12, replace=T))
rbind(x,g)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## x 1 2 3 4 5 6 7 8 9 10 11 12
## g 3 1 1 3 3 2 2 3 2 1 1 3
round(tapply(x, g, mean), 2)
## 1 2 3
## 6.50 7.33 6.00
infarkt <- read.csv2("Infarkt.csv")
tapply(infarkt$Chol, infarkt$Gruppe, mean)
## Infarkt Kontrolle
## 263.0 176.5
sapply()
# sapply functions
l <- list(groesse=c(160,170,177,182), gewicht=c(70,75,80,90,65,78))
sapply(l, mean)
## groesse gewicht
## 172.25000 76.33333
replicate()
normal <- replicate(8, rnorm(5))
round(normal, 3)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 1.097 -0.549 -0.245 -0.036 -0.511 0.471 0.346 -1.291
## [2,] 0.478 1.005 0.804 -0.038 -1.394 -0.386 0.273 -0.782
## [3,] -0.281 -0.315 0.294 -0.905 -0.675 1.887 0.007 -0.479
## [4,] 1.124 0.030 -0.310 -1.434 -0.045 -1.433 0.042 -1.373
## [5,] 0.578 -0.299 -0.883 1.690 0.625 -0.164 0.585 1.406
vect <- c(1.42, 4.84, -2.55, - 1.24)
vect
## [1] 1.42 4.84 -2.55 -1.24
abs(vect)
## [1] 1.42 4.84 2.55 1.24
round(vect, digits=1)
## [1] 1.4 4.8 -2.5 -1.2
ceiling(vect)
## [1] 2 5 -2 -1
floor(vect)
## [1] 1 4 -3 -2
trunc(vect)
## [1] 1 4 -2 -1
round(vect, digits=1)
## [1] 1.4 4.8 -2.5 -1.2
max(vect)
## [1] 4.84
min(vect)
## [1] -2.55
exp(5)
## [1] 148.4132
round(sin(seq(0, 2*pi, by=(pi/4))), digits=3)
## [1] 0.000 0.707 1.000 0.707 0.000 -0.707 -1.000 -0.707 0.000
sqrt(7)
## [1] 2.645751
Kontingenztafeln
infarkt <- read.csv2("infarkt.csv")
table(infarkt$Gruppe, infarkt$Geschlecht)
##
## m w
## Infarkt 28 12
## Kontrolle 25 15
tab <- as.data.frame(table(infarkt$Gruppe, infarkt$Geschlecht))
names(tab) <- c("Gruppe","Geschlecht","H?ufigkeit")
tab
Statistische Maßzahlen:
infarkt <- read.csv2("infarkt.CSV")
attach(infarkt)
mean(Alter)
## [1] 58.4875
sd(Alter)
## [1] 10.70549
max(Blutz)
## [1] 350
quantile(RRsyst, prob=c(0.10,0.25,0.50,0.75,0.90))
## 10% 25% 50% 75% 90%
## 140.00 148.75 160.00 175.00 190.00
summary(Chol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 85.0 180.0 200.0 219.8 222.5 800.0
aggregate()
infarkt <- read.csv2("infarkt.csv")
attach(infarkt)
stat <- aggregate(Chol, by=data.frame(Geschlecht), summary, na.rm=T)
print(stat)
## Geschlecht x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max.
## 1 m 85.0000 190.0000 205.0000 220.8491 230.0000 800.0000
## 2 w 140.0000 177.5000 190.0000 217.5926 210.0000 485.0000
stat[[2]][,3] # Teilergebnisse: Medianwerte
## [1] 205 190
infarkt <- read.csv2("infarkt.CSV")
attach(infarkt)
par(mfrow=c(1,3), ps=14, font=2, font.axis=2, font.lab=2,
font.main=2, font.sub=2, lwd=2)
hist(Chol, main="Histogramm", col="grey")
boxplot(Blutz[Gruppe=="Infarkt"], Blutz[Gruppe=="Kontrolle"],
xlab="Gruppe", main="Box-Whisker Plot")
plot(RRsyst, RRdias, main="Punktwolke")
par(mfrow=c(1,2), lwd=2, font.axis=2, bty="l", ps=12)
Pex <- 2.0
ipch <- 1:20
k <- floor(sqrt(20))
dd <- c(-1, 1)/2
rx <- dd + range(ix <- (ipch - 1)%/%k)
ry <- dd + range(iy <- 3 + (k - 1) - (ipch - 1)%%k)
pch <- as.list(ipch)
plot(rx, ry, type = "n", axes = FALSE, xlab = "", ylab = "",
main = "Symbole (pch)")
for (i in 1:20) {
pc <- pch[[i]]
points(ix[i], iy[i], pch = pc, col = "black",
bg = "yellow", cex = Pex)
text(ix[i] - 0.5, iy[i], pc, col = "black", font=3, cex = 1.0)
}
Pex <- 3
ity <- 7
dd <- c(-1, 1)/2
rx <- dd + range(ix <- rep(c(1,2), 7))
ry <- dd + range(iy <- seq(1, 7, by=1))
plot(rx, ry, type = "n", axes = FALSE, xlab = "", ylab = "",
main = "Linien (lty)")
for (i in 1:7) {
pc <- i
lines(c(ix[i], ix[i+1]), c(iy[i], iy[i]), lty=i, lwd=2, cex = Pex)
text(1 - 0.2, iy[i], pc, col = "black", font=3, cex = 1.0)
}
par(mfrow=c(1,1),ps=14, font=2, font.axis=2, font.lab=2,
font.main=2, font.sub=2, lwd=2)
x <- seq(-4, +4, by=0.2)
y <- x^2
plot(x, y, type="l")
abline(v=0)
polygon(x[10:30], y[10:30], density=10)
text(2.5,1, "Segment")
title("Parabel")