diff --git a/scripts/phase2/transform_data.r b/scripts/phase2/transform_data.r
index b3d9e72f07fb4f6efbfcd6acbcb4287a9f70478f..96fd367203d28c1db27e6b474d347109ba16ded3 100644
--- a/scripts/phase2/transform_data.r
+++ b/scripts/phase2/transform_data.r
@@ -8,12 +8,12 @@ output <- "C:/Users/its-student/Desktop/Phase2Out.xlsx"
 
 # read in sheets
 data        <- read_excel(input, sheet = "Data")
-commission  <- read_excel(input, sheet = "Commission") # by group and policy duration
-demographic <- read_excel(input, sheet = "Demographic")
-expense     <- read_excel(input, sheet = "Expense") # bin by annualized net premium
-rtn         <- read_excel(input, sheet = "RTN")
-sic         <- read_excel(input, sheet = "SIC")
-tax         <- read_excel(input, sheet = "Tax")
+commission  <- read_excel(input, sheet = "Commission")  # join by group id & policy duration
+demographic <- read_excel(input, sheet = "Demographic") # join by group id
+expense     <- read_excel(input, sheet = "Expense")     # join by annualized net premium
+rtn         <- read_excel(input, sheet = "RTN")         # join by max lives, voluntary & policy duration
+sic         <- read_excel(input, sheet = "SIC")         # join by sic code
+tax         <- read_excel(input, sheet = "Tax")         # join by state
 
 # drop reserves not related to STD (IBNR)
 data <- data %>% select(-c(ICOS, WAIVER_IBNR, GAAP_RESV, WAIVER_RESERVE))
@@ -31,7 +31,7 @@ data <- merge(x = data, y = sic, by.x = "SIC", by.y = "SIC_CODE", all.x = TRUE)
 # remove rows where the sic has no industry (e.g. SIC = 1790)
 data <- data %>% filter(!is.na(INDUSTRY))
 
-# left outer-join on demographics (age, gender, salary)
+# left outer-join on demographics (age, gender & salary)
 data <- merge(x = data, y = demographic, by.x = "GROUP_ID", by.y = "GROUP_ID", all.x = TRUE)
 
 # append percent commission