diff --git a/scripts/phase2/transform_data_pca.r b/scripts/phase2/transform_data_pca.r index 54cd5bb62979bf12c0a62b05460d203dc8301bdd..6a08dfb6adaf24fd5a683245a547bad32e89b12a 100644 --- a/scripts/phase2/transform_data_pca.r +++ b/scripts/phase2/transform_data_pca.r @@ -149,17 +149,15 @@ data <- data %>% mutate(LTD_INDICATOR = case_when(LTD_INDICATOR == "With LTD" data <- data %>% mutate(ACTIVE_TERMED = case_when(ACTIVE_TERMED == "Active" ~ 1, ACTIVE_TERMED == "Terminated" ~ 0)) -# keep numerics for PCA -data <- data %>% select(#"REGION", "INDUSTRY", - #"COVG_CODE", - "AVG_SALARY", "AVG_AGE", "PCT_FEMALE", +# numerics only for PCA +data <- data %>% select("AVG_SALARY", "AVG_AGE", "PCT_FEMALE", "TRUE_GROUP_VOL", "LTD_INDICATOR", "ACTIVE_TERMED", "MAX_LIVES", "POLICY_DURATION", "PREM", "EST_ANNUALIZED_NET_PREM", "RTN", "PAID_COMMISSION", "PAID_CLAIMS", "IBNR", "PERCENT_COMMISSION", "PREMIUM_TAX", "INTERNAL_EXPENSES", "PERCENT_PEPM") -# use FactoMineR for PCA +# use FactoMineR to compute PCA data.pca <- PCA(data, scale.unit = TRUE, ncp = 5, graph = TRUE) # create scree plot