cat("\14") rm(list=ls()) # clear the list of objects graphics.off() # clear the list of graphs options(digits = 3) # set number of digits to display library(ggplot2) library(mise) library(rJava) library(xlsxjars) library(reshape) library(psych) library(xlsx) library(corrplot) library(sqldf) library(glmnet) library(ISLR) library(MASS) setwd("C:/Users/vterr/oneDrive/Escritorio") #cambiar? la direcci?n de su actual sesi?n data<-read.csv("bank.csv", header=T) #analisis exploratorio data$y1[data$y=="yes"]<-1 data$y1[data$y=="no"]<-0 for( i in 2:13){ for(j in 1:length(data[,i])){ if(data[j,i]=="unknown") data[j,i]<-NA } } data<-na.omit(data) #Regresion data_train<-data[sample(nrow(data), 21635), ] data_test<-sqldf('select * from data where X not in (select X from data_train)') #backward reg<-lm(y1~age+job+marital+education+default+housing+loan+contact+duration+campaign,data=data_train) unique(data$job) summary(reg) #Prediccion predicted_value <- predict(reg,data_test,type = "response") predicted_class <- ifelse(predicted_value>0.3, "yes","no") #MAtriz de confusion performance_data<-data.frame(observed=data_test$y,predicted= predicted_class) positive <- sum(performance_data$observed=="yes") negative <- sum(performance_data$observed=="no") predicted_positive <- sum(performance_data$predicted=="yes") predicted_negative <- sum(performance_data$predicted=="no") total <- nrow(performance_data) data.frame(positive, negative,predicted_positive,predicted_negative) tp<-sum(performance_data$observed=="yes" & performance_data$predicted=="yes") tn<-sum(performance_data$observed=="no" & performance_data$predicted=="no") fp<-sum(performance_data$observed=="no" & performance_data$predicted=="yes") fn<-sum(performance_data$observed=="yes" & performance_data$predicted=="no") data.frame(tp,tn,fp,fn) # prediccion # si no #real si tp fp # no fn tn accuracy<-(tp+tn)/(tp+tn+fp+fn) tp/(tp+fp) tn/(tn+fn) reg2<-lm(y1~age+job+marital+education+housing+loan+duration+campaign,data=data) summary(reg2) #Prediccion predicted_value <- predict(reg2,data_test,type = "response") predicted_class <- ifelse(predicted_value>0.3, "yes","no") ##Matriz de Confusion performance_data<-data.frame(observed=data_test$y,predicted= predicted_class) positive <- sum(performance_data$observed=="yes") negative <- sum(performance_data$observed=="no") predicted_positive <- sum(performance_data$predicted=="yes") predicted_negative <- sum(performance_data$predicted=="no") total <- nrow(performance_data) data.frame(positive, negative,predicted_positive,predicted_negative) tp<-sum(performance_data$observed=="yes" & performance_data$predicted=="yes") tn<-sum(performance_data$observed=="no" & performance_data$predicted=="no") fp<-sum(performance_data$observed=="no" & performance_data$predicted=="yes") fn<-sum(performance_data$observed=="yes" & performance_data$predicted=="no") data.frame(tp,tn,fp,fn) # prediccion # si no #real si tp fp # no fn tn accuracy<-(tp+tn)/(tp+tn+fp+fn) tp/(tp+fp) tn/(tn+fn)