1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
#1-2.2 null.na值處理,直接刪除或以中位數或均值替補.
#方法1.直接刪除
#方法2.偏態以中位數替補,正態以平均數替補
#用直方圖與盒鬚圖辨別辨別分佈型態
par(mfrow=c(2,4))
for(i in 1:length(seat)){
hist(data[,seat[i]],freq = TRUE,
main = paste("Histogram of" , colnames(data)[seat[i]]),
xlab = colnames(data)[seat[i]])
boxplot(data[,seat[i]],
main = colnames(data)[seat[i]],
xlab = "time",outcol="red")
}
#缺失值替代
MAX <- function(df){
if(length(which(is.na(df))!=0)){df <- df[-which(is.na(df))]}
Q1 <- as.data.frame(quantile(df, 1 / 4))[1,1]
Q3 <- as.data.frame(quantile(df, 3 / 4))[1,1]
IQR <- Q3-Q1
MAX <- Q3+1.5*IQR
return(MAX)
}
MIN <- function(df){
if(length(which(is.na(df))!=0)){df <- df[-which(is.na(df))]}
Q1 <- as.data.frame(quantile(df, 1 / 4))[1,1]
Q3 <- as.data.frame(quantile(df, 3 / 4))[1,1]
IQR <- Q3-Q1
MIN <- Q1-1.5*IQR
return(MIN)
}
seat_1 <- c(which(is.null(data[,seat[1]])),which(is.na(data[,seat[1]])));#BQ中位數
seat_2 <- c(which(is.null(data[,seat[5]])),which(is.na(data[,seat[5]])));#EL均值
#BQ
for (i in 1:length(seat_1)) {
data[seat_1[i],seat[1]] <- median(data[,seat[1]],na.rm = TRUE)
}
#EL
for (i in 1:length(seat_2)) {
data[seat_5[i],seat[5]] <- mean(data[,seat[5]],na.rm = TRUE)
}
|