readSED+QuestionMapping.R

# source("PSES-SED-readData.R") --> source("PSES_readQuestionMapping.R")

if (T) { # libraries----
  library(readxl)
  library(data.table)
  library(magrittr)
  library(lubridate)
  library(stringr)
  #library(tibble)
  
  library(ggplot2)
  options(datatable.print.class=TRUE)
  options(lubridate.week.start =  1) 
  
  
  theme_set(theme_bw())
  #theme_set(theme_minimal())
  # library(ggthemes); #library(RColorBrewer)
  # theme_set(theme_economist())
  # theme_set(theme_economist_white())
  
  strFile <- "dtSED-2011-2018.xls"
  strFile <- "dtSED-2008-2018.xls"
}

########################################################### #
#  dtQmapping <- read_excel(strFile, sheet=4) ---- 
########################################################### #
dtQmapping <- read_excel(strFile, sheet=4) %>% data.table()

cols <- c("QUESTION", "n2017","n2017a","n2014","n2011","n2008")
setnames(dtQmapping,c("Question", cols))

for (c in cols) {
  dtQmapping[as.name(c)=="N/A", c:=NA]
}
dtQmapping

dtQmapping[, (cols):=lapply(.SD, as.integer), .SDcols=cols]
dtQmapping[, (cols):=lapply(.SD, function(x) sprintf("Q%02i",x)), .SDcols=cols]
dtQmapping[, (cols):=lapply(.SD, function(x) ifelse(x=="QNA", NA, x)), .SDcols=cols]


########################################################### #
#  dtSED <- read_excel(strFile, sheet=2) ---- 
########################################################### #

dtSED <- read_excel(strFile, sheet=2) %>% data.table(); 
rm(strFile)
dtSED %>% names


#setnames(dtSED, "Average Score  (0-100)", "Score")
#setnames(dtSED, "Survey year", "Year")


#dtSED <- dtSED[Year %>% str_sub(1,1) == "2", c(1:8, 12:13,15,19)] #ignore empty lines
dtSED <- dtSED[Year %>% str_sub(1,1) == "2"] #ignore empty lines
dtSED[, Year:= as.integer(Year)]
#dtSED <- dtSED[Organization != "Headquarters"]

# Level ID ----

if (F) {
  dtSED[ str_detect(Organization, "^Public"), Organization:=paste0("0.", Organization)]
  dtSED[ str_detect(Organization, "Border"), Organization:=paste0("1.", Organization)]
  dtSED[ str_detect(Organization, "Headquarters"), Organization:=paste0("2.", Organization)]
  dtSED[ str_detect(Organization, "^Infor"), Organization:=paste0("3.", Organization)]
  dtSED[ str_detect(Organization, "^SCIENCE"), Organization:=paste0("4.", Organization)]
}
if (F) { # Asked at stackoverflow
  dtSED$LEVELID <- list()
  #for (i in 1:4) dtSED$LEVELID[[i]] <- 0
  dtSED[ str_detect(Organization, "Border"), LEVELID:=list(list(83,0,0,0))]
  dtSED[ str_detect(Organization, "Headquarters"), LEVELID:=.(.(.(83,200,0,0)))]
  dtSED[ str_detect(Organization, "^Infor"), LEVELID:=.(.(list(83,200,304,0)))]
  dtSED[ str_detect(Organization, "^SCIENCE"), LEVELID:=list(list(list(83,200,304,418)))]  
}
if (F) { # Asked at stackoverflow
  #  dtSED$ID <- NULL
  #  for (i in 1:4) dtSED$ID[[i]] <- 0
  dtSED[ str_detect(Organization, "Border"), ID:= .(.(c(83,0,0,0)))]
  dtSED[ str_detect(Organization, "Headquarters"), ID:=list(list(c(83,200,0,0)))]
  dtSED[ str_detect(Organization, "^Infor"), ID:=.(.(c(83,200,304,0)))]
  dtSED[ str_detect(Organization, "^SCIENCE"), ID:=.(.(c(83,200,304,418)))] 
  
  dtSED[all(ID == c(83,0,0,0))]
  dtSED$ID
  
}

if (T) {
  dtSED$LEVEL <- 0
  dtSED[ str_detect(Organization, "Border"), LEVEL:=1]
  dtSED[ str_detect(Organization, "Headquarters"), LEVEL:=2]
  dtSED[ str_detect(Organization, "^Infor"), LEVEL:=3]
  dtSED[ str_detect(Organization, "^SCIENCE"), LEVEL:=4]
}

myID <- c(83,200,304,418)
# myLEVEL1ID=83 # CBSA
# myLEVEL2ID=200 # Headquater
# myLEVEL3ID=304 # Information, Science and Technology Branch
# myLEVEL4ID=418 # Science and Engineering


# Define dtQuestions22, which contains themes and mapping----

dtQuestions22 <- dtSED[ ,12:18] %>% unique(by='n2018'); 
dtQuestions22
cols <- 2:5
dtQuestions22[, (cols):=lapply(.SD, function(x) sprintf("Q%02i",x)), .SDcols=cols]
dtQuestions22[, (cols):=lapply(.SD, function(x) ifelse(x=="QNA", NA, x)), .SDcols=cols]
rm(cols)
dtQuestions22 <- dtQuestions22[!is.na(Theme)]

#dtThemes <- dtSED[ ,.(n2018,Theme)] %>% unique();   #redundant
#dtThemes[Q2018 > 40, Theme:= "My Organization - Overall"]; dtThemes


if (F) {
  fwrite(dtQuestions, "PSES-22-questions.csv", sep='\t')
}
if (T) { # abreviate Q.
  dtQuestions22[ , strQuestionLong := strQuestion]
  dtQuestions22$strQuestion <- gsub("Question ", "", dtQuestions22$strQuestion) %>% str_trunc(50,ellipsis = "..")
}
dtQuestions22[,1:6]

#strCaptionCredits <- "2018 Public Service Employee Survey Results (Source: https://open.canada.ca)"
#strCaptionCredits <- "Data  Source: https://open.canada.ca"
# strCaptionCredits <- "2018 Public Service Employee Survey Results"
#strCaptionCredits <- "Licence: Open Government Licence - Canada"
#strCaptionCredits <- "Generated by PSES iReport App (https://o-canada.shinyapps.io/PSES-App)\nLicence: Open Government Licence - Canada"
#dtQuestions2 [,1:5]

if (F) {
  return (list(dtSED, dtQuestions22))
}

if(F) {
  dtSED[is.na(LEVEL)]
  dtSED[is.na(LEVEL), LEVEL:=0]
  dtSED[ , LEVEL := factor(LEVEL, levels=order(LEVEL,decreasing = T))]
  dtSED[ , Organization := factor(Organization, levels=order(LEVEL,decreasing = T))]
}


psesSED.plotQuestionByTime_lines <- function(.dt, .strQuestion) {
  
  # .dt[is.na(LEVEL), LEVEL:=0]
  # .dt[ , LEVEL := factor(LEVEL, levels=order(LEVEL,decreasing = T))]
  # .dt[ , Organization := factor(Organization, levels=order(LEVEL,decreasing = T))]
  
  
  # ScoreAve <- .dt[Year == 2018 &
  #                   Organization=="Public Service"]$Score
  
  g2 <- ggplot( .dt ) + theme_bw() + # + theme_economist_white() +
    # geom_hline(aes(yintercept=ScoreAve), size=1,  col="orange", linetype=4) +
    geom_hline(aes(yintercept=50), size=1,  col="red", linetype=4) +
    
    geom_line(aes(Year, Score, col=Organization)) +
    
    geom_label(aes(Year, Score,
                   fill=Organization, label=get("Total responses"))) +
    guides(fill="none") +
    # guides(size="none") +
    # scale_fill_brewer(palette = "Blues")  +
    # scale_color_brewer(palette = "Blues")  +
    
    labs(
      #subtitle=.strQuestion,
      #   title=dtQuestions[QUESTION == r.nQ()]$Question, 
      #title=paste0(input$year, " Public Service Employee Survey Results"),
      subtitle=paste0(.strQuestion,"\n(Number of responses is indicated in the box)"),
      #title=paste0("Public Service Employee Survey Results "),
      # 
      
      y="Score = Positive responses / All responses (%)",
      # subtitle = paste0(
      #   # "Department ID: ", myLEVEL1ID, "/",myLEVEL2ID, "/", myLEVEL3ID, "/", myLEVEL4ID, 
      #   " (Number of responses is indicated in the box)") ,
      
      x=NULL,
      caption = paste0("Public Service Employee Survey Results",
                       #"License: Open Government Licence - Canada\n",
                       "\nGenerated by iTrack (https://itrack.shinyapps.io/PSES)")
      # caption = paste0("Data Source: Open Canada\n Generated", #  on ", format(Sys.time(), "%d %B, %Y"), 
    )    +
    scale_x_continuous(limits=c(2007,2019),breaks=c(2008,2011,2014,2016,2018)) 
  
  print(g2)
}