Commit 80d473c6 authored by Carl Corder's avatar Carl Corder
Browse files

clean and remove unneeded columns

parent bce83d69
......@@ -15,19 +15,26 @@ rtn <- read_excel(input, sheet = "RTN") # join by max lives, vol
sic <- read_excel(input, sheet = "SIC") # join by sic code
tax <- read_excel(input, sheet = "Tax") # join by state
# drop reserves not related to STD (IBNR)
# drop reserves not related to STD
data <- data %>% select(-c(ICOS, WAIVER_IBNR, GAAP_RESV, WAIVER_RESERVE))
# keep positive max lives, est premium, gross premium, paid claims and reserves
data <- data %>% filter(MAX_LIVES > 0,
EST_ANNUALIZED_NET_PREM > 0,
PREM > 0,
PAID_CLAIMS > 0,
IBNR > 0)
# create date from incurred year and month
data <- data %>% mutate(INC_DATE = as.Date(paste(INC_YEAR, INC_MONTH, 01, sep = "-")))
# drop redundant incurred date columns
data <- data %>% select(-c(INC_MONTH, INC_YEAR, INC_MONTHYEAR))
# positive lives, duration, premiums, claims and reserves
data <- data %>% filter(MAX_LIVES > 0, POLICY_DURATION >= 0,
EST_ANNUALIZED_NET_PREM > 0, PREM > 0,
PAID_CLAIMS >= 0, IBNR > 0)
# left outer-join on industry code
data <- merge(x = data, y = sic, by.x = "SIC", by.y = "SIC_CODE", all.x = TRUE)
# drop sic & description
data <- data %>% select(-c(SIC, SIC_DESC))
# remove rows where the sic has no industry (e.g. SIC = 1790)
data <- data %>% filter(!is.na(INDUSTRY))
......@@ -116,6 +123,18 @@ data <- data %>% mutate(ACTUAL_CLAIMS = PAID_CLAIMS + IBNR)
# calculate actual to expected ratio
data <- data %>% mutate(ACTUAL_TO_EXPECTED = ACTUAL_CLAIMS / EXPECTED_CLAIMS)
# formatting
data <- data %>% mutate(POLICY_EFFECTIVE_DATE = as.Date(data$POLICY_EFFECTIVE_DATE, "%Y-%m-%d"))
# re-order columns
data <- data %>% select("GROUP_ID", "DIST_ID", "REP_ID",
"REG_OFFICE", "STATE", "INDUSTRY", "SUB_INDUSTRY",
"AVG_SALARY", "AVG_AGE", "PCT_FEMALE",
"POLICY_EFFECTIVE_DATE", "INC_DATE", "POLICY_DURATION",
"COVG_CODE", "TRUE_GROUP_VOL", "ACTIVE_TERMED", "LTD_INDICATOR",
"MAX_LIVES", "PREM", "NEEDED_PREMIUM", "RTN", "EST_ANNUALIZED_NET_PREM",
"PAID_COMMISSION", "PAID_CLAIMS", "IBNR", "ACTUAL_CLAIMS", "EXPECTED_CLAIMS", "ACTUAL_TO_EXPECTED",
"PERCENT_COMMISSION", "PREMIUM_TAX", "INTERNAL_EXPENSES", "PERCENT_PEPM", "TLR")
# write data to Excel
write_xlsx(data, path = output, col_names = TRUE)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment