Wednesday, 8 May 2019

R Programming-Short Notes


# R Standard Formulas
getwd()
setwd()
install.packages("package_name")
library("package_name")
require("package_name")
help(attribute)
?attributes

#Reading Different Files
data <- read.csv(file="file_name.csv",header = True,stringsAsFactors = False)
#Exploring the Data
ncol(data)
nrow(data)
str(data)
summary(data)
View(data)
colnames(data)

#Sub setting Data
data <- data[,-c(1,2,4:7)] # Eliminating the columns

#checking missing values
sapply(data,function(x) sum(is.na(x)))

#removing the rows with missing value
data <- na.omit(data)

#Converting variables
data$attribute < - as.numeric(data$arribute1)

# Continuous variable exploration
plot(data$variable)
hist(data$variable)

#Visualizing variable by applying log
hist(log10(data$variable))

#Categorical Variables Exploration
table(data$variable)
table(data$variable)

#Bivariate Categorical Variable Analysis
table(data$variab,data$Churn)
table(data$MultipleLines,data$Churn)

#Bivariate Continous Variable Analysis
BiVarDataCont = group_by(data, Churn)
summarise(BiVarDataCont, avg_value = mean(tenure))
summarise(BiVarDataCont, avg_value = mean(MonthlyCharges))
aggregate(tenure~Churn,data,mean)

# Creating buckets
data$attribute_band <- ifelse(data$attribute>750,"750+",
ifelse(data$attribute >720,"720-750","<720"))

#Final variable selection in model
data <- data[,-c(5,7,8,9,10:15)]

#Co-relation matrix
cor_mat <- cor(data)
#In general,after analyzing correlated variables (Positive and negative correlation) variable that have correlation index more than 0.7 are removed

No comments:

Post a Comment