Commit 7f7a8c9c authored by Carl Corder's avatar Carl Corder
Browse files

add latitude and longitude by state

parent 83ea3167
......@@ -27,14 +27,12 @@ data <- data %>% select(-c(INC_MONTH, INC_MONTHYEAR))
# drop broker and LFG sales rep hash key
#data <- data %>% select(-c(DIST_ID, REP_ID))
# positive lives, duration, premiums, claims and reserves
data <- data %>% filter(MAX_LIVES > 0, POLICY_DURATION >= 0,
EST_ANNUALIZED_NET_PREM > 0, PREM > 0,
PAID_CLAIMS >= 0, IBNR > 0)
# format policy effective date
data <- data %>% mutate(POLICY_EFFECTIVE_DATE = as.Date(data$POLICY_EFFECTIVE_DATE, "%Y-%m-%d"))
# format incurred year as numeric
data <- data %>% mutate(INC_YEAR = as.numeric(INC_YEAR))
# group claims on annual basis
data <- data %>% group_by(GROUP_ID, DIST_ID, REP_ID,
COVG_CODE, TRUE_GROUP_VOL, POLICY_EFFECTIVE_DATE,
......@@ -43,6 +41,11 @@ data <- data %>% group_by(GROUP_ID, DIST_ID, REP_ID,
summarise(PREM = sum(PREM), EST_ANNUALIZED_NET_PREM = mean(EST_ANNUALIZED_NET_PREM),
PAID_COMMISSION = sum(PAID_COMMISSION), PAID_CLAIMS = sum(PAID_CLAIMS), IBNR = sum(IBNR))
# positive lives, duration, premiums, claims and reserves
data <- data %>% filter(MAX_LIVES > 0, POLICY_DURATION >= 0,
EST_ANNUALIZED_NET_PREM > 0, PREM > 0,
PAID_CLAIMS >= 0, IBNR > 0)
# left outer-join on industry code
data <- merge(x = data, y = sic, by.x = "SIC", by.y = "SIC_CODE", all.x = TRUE)
......@@ -56,7 +59,7 @@ data <- data %>% filter(!is.na(INDUSTRY))
data <- merge(x = data, y = demographic, by.x = "GROUP_ID", by.y = "GROUP_ID", all.x = TRUE)
# remove groups with missing demographics
data <- data %>% filter(!(is.na(AVG_AGE) | is.na(AVG_SALARY) | is.na(PCT_FEMALE)))
data <- data %>% filter(!is.na(AVG_AGE), !is.na(AVG_SALARY), !is.na(PCT_FEMALE))
# append percent commission
data <- merge(x = data,
......@@ -73,6 +76,24 @@ data <- merge(x = data, y = tax, by.x = "STATE", by.y = "STATE", all.x = TRUE)
# remove rows with unmapped state tax (e.g. 91, FO)
data <- data %>% filter(!is.na(PREMIUM_TAX))
# state to lat-lon map
latlon <- list("AL" = c(33,-87), "AK" = c(61,-152), "AZ" = c(34,-111), "AR" = c(35,-92),
"CA" = c(36,-120), "CO" = c(39,-105), "CT" = c(42,-73), "DE" = c(39,-76),
"DC" = c(39,-77), "FL" = c(28,-82), "GA" = c(33,-84), "HI" = c(21,-157),
"ID" = c(44,-114), "IL" = c(40,-89), "IN" = c(40,-86), "IA" = c(42,-93),
"KS" = c(39,-97), "KY" = c(38,-85), "LA" = c(31,-92), "ME" = c(45,-69),
"MD" = c(39,-77), "MA" = c(42,-72), "MI" = c(43,-85), "MN" = c(46,-94),
"MS" = c(33,-90), "MO" = c(38,-92), "MT" = c(47,-110), "NE" = c(41,-98),
"NV" = c(38,-117), "NH" = c(43,-72), "NJ" = c(40,-75), "NM" = c(35,-106),
"NY" = c(42,-75), "NC" = c(36,-80), "ND" = c(48,-100), "OH" = c(40,-83),
"OK" = c(36,-97), "OR" = c(45,-122), "PA" = c(41,-77), "RI" = c(42,-72),
"SC" = c(34,-81), "SD" = c(44,-99), "TN" = c(36,-87), "TX" = c(31,-98),
"UT" = c(40,-112), "VT" = c(44,-73), "VA" = c(38,-78), "WA" = c(47,-121),
"WV" = c(38,-81), "WI" = c(44,-90), "WY" = c(43,-107))
# create latitude and longitude columns
data <- data %>% rowwise() %>% mutate(LAT = latlon[[STATE]][1], LON = latlon[[STATE]][2])
# situs state to region map
regions <- list("east" = c("AL","CT","DC","DE","GA","MA","MD","ME","MS","NC","NH","NJ","NY","PA","RI","SC","TN","VA","VT"),
"central" = c("FL","IA","IL","IN","KY","MI","MN","MO","ND","NE","OH","SD","WI","WV"),
......@@ -144,11 +165,11 @@ data <- data %>% mutate(ACTUAL_TO_EXPECTED = ACTUAL_CLAIMS / EXPECTED_CLAIMS)
# re-order columns
data <- data %>% select("GROUP_ID", "DIST_ID", "REP_ID",
"REG_OFFICE", "STATE", "REGION", "INDUSTRY", "SUB_INDUSTRY",
"REG_OFFICE", "STATE", "LAT", "LON", "REGION", "INDUSTRY", "SUB_INDUSTRY",
"AVG_SALARY", "AVG_AGE", "PCT_FEMALE",
"POLICY_EFFECTIVE_DATE", "POLICY_DURATION",
"COVG_CODE", "TRUE_GROUP_VOL", "ACTIVE_TERMED", "LTD_INDICATOR",
"MAX_LIVES", "PREM", "NEEDED_PREMIUM", "RTN", "EST_ANNUALIZED_NET_PREM",
"MAX_LIVES", "PREM", "NEEDED_PREMIUM", "RTN", "EST_ANNUALIZED_NET_PREM", "INC_YEAR",
"PAID_COMMISSION", "PAID_CLAIMS", "IBNR", "ACTUAL_CLAIMS", "EXPECTED_CLAIMS", "ACTUAL_TO_EXPECTED",
"PERCENT_COMMISSION", "PREMIUM_TAX", "INTERNAL_EXPENSES", "PERCENT_PEPM", "TLR")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment