library(ranger)
data.payment <- read.csv(file="~/Desktop/data-payment.csv",sep=";",header=TRUE)
data.payment <- data.payment[,-1]
data.payment$SEX <- as.factor(data.payment$SEX)
data.payment$EDUCATION <- as.factor(data.payment$EDUCATION)
data.payment$MARRIAGE <- as.factor(data.payment$MARRIAGE)
data.payment$y <- as.factor(data.payment$y)
indi <- sample(1:30000,1000)
data.train <- data.payment[-indi,]
data.test <- data.payment[indi,]
data.train <- cbind(data.train,u=matrix(runif(290000),29000,10))
model.rf <- ranger(y~.,data.train,mtry=11,num.trees=500)
# OOB
model.rf$prediction.error
model.rf$confusion.matrix
model.rf$predictions
mean(model.rf$predictions!=data.train$y)
res.test <- predict(model.rf,data.test)
mean(res.test$predictions!=data.test$y)
library(corrplot)
corrplot(cor(data.train[,6:11]))
model.logit <- glm(y~.,data=data.train,family="binomial")
res.glm <- as.factor(as.numeric(plogis(predict(model.logit,data.test))>=1/10))
library(e1071)
model.svm <- svm(y~.,data=data.train)