Commit 3d601a85 authored by Carl Corder's avatar Carl Corder
Browse files

Update transform_data.r

parent d8cfffee
......@@ -3,12 +3,13 @@ library("writexl")
library("dplyr")
# Phase 2 Excel
input <- "C:/Users/its-student/Desktop/Phase2In.xlsx" # https://unl.box.com/s/4dfo4iv2n8awiqt20himu24kyhtmgavk
output <- "C:/Users/its-student/Desktop/Phase2Out.xlsx" # https://unl.box.com/s/vyfmeb62bc1umiuuuuj0tkvo2zvp28uq
input <- "C:/Users/its-student/Desktop/Phase2In.xlsx"
output <- "C:/Users/its-student/Desktop/Phase2Out.xlsx"
# read in sheets
data <- read_excel(input, sheet = "Data")
commission <- read_excel(input, sheet = "Commission") # by group and policy duration
demographic <- read_excel(input, sheet = "Demographic")
expense <- read_excel(input, sheet = "Expense") # bin by annualized net premium
rtn <- read_excel(input, sheet = "RTN")
sic <- read_excel(input, sheet = "SIC")
......@@ -23,6 +24,9 @@ data <- merge(x = data, y = sic, by.x = "SIC", by.y = "SIC_CODE", all.x = TRUE)
# remove rows where the sic has no industry (e.g. SIC = 1790)
data <- data %>% filter(!is.na(INDUSTRY))
# left outer-join on demographics (age, gender, salary)
data <- merge(x = data, y = demographic, by.x = "GROUP_ID", by.y = "GROUP_ID", all.x = TRUE)
# append percent commission
data <- merge(x = data,
y = commission[, c("GROUP_ID", "POLICY_DURATION", "PERCENT_COMMISSION")],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment