# R Standard Formulas
getwd()
setwd()
install.packages("package_name")
library("package_name")
require("package_name")
help(attribute)
?attributes
#Reading Different Files
data <-
read.csv(file="file_name.csv",header = True,stringsAsFactors = False)
#Exploring the Data
ncol(data)
nrow(data)
str(data)
summary(data)
View(data)
colnames(data)
#Sub setting Data
data <- data[,-c(1,2,4:7)] #
Eliminating the columns
#checking missing values
sapply(data,function(x) sum(is.na(x)))
#removing the rows with missing value
data <- na.omit(data)
#Converting variables
data$attribute < -
as.numeric(data$arribute1)
# Continuous variable exploration
plot(data$variable)
hist(data$variable)
#Visualizing variable by applying log
hist(log10(data$variable))
#Categorical Variables Exploration
table(data$variable)
table(data$variable)
#Bivariate Categorical Variable
Analysis
table(data$variab,data$Churn)
table(data$MultipleLines,data$Churn)
#Bivariate Continous Variable Analysis
BiVarDataCont = group_by(data, Churn)
summarise(BiVarDataCont, avg_value =
mean(tenure))
summarise(BiVarDataCont, avg_value =
mean(MonthlyCharges))
aggregate(tenure~Churn,data,mean)
# Creating buckets
data$attribute_band <-
ifelse(data$attribute>750,"750+",
ifelse(data$attribute
>720,"720-750","<720"))
#Final variable selection in model
data <- data[,-c(5,7,8,9,10:15)]
#Co-relation matrix
cor_mat <- cor(data)
#In general,after analyzing correlated
variables (Positive and negative correlation) variable that have correlation
index more than 0.7 are removed
No comments:
Post a Comment