-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDeveloper Census 2020 Report.Rmd
3075 lines (2236 loc) · 120 KB
/
Developer Census 2020 Report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: |
| Hacklab Ghana
| Developer Census 2020
subtitle: "Published by Hacklab Research"
output:
html_document:
toc: true
toc_float: true
toc_depth: 2
toc_collapsed: true
css: css_theme/theme.css # where the css theme is defined
includes:
after_body: css_theme/footer_no-icons.html # use footer.html if want fontawesome icons for twitter & email
---
```{css toc-content, echo = FALSE}
/* add some space before the TOC */
/* for no apparent reason, this is working here but not in the .css... */
#TOC {
margin-top: 5rem;
}
```
```{r setup, include = FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE,
warning = FALSE, # we do not want to display the code or warnings
message = FALSE,
dev = "svg" # figures are SVGs
)
# Environment:
library(xaringanExtra) # to be able to have tabs. To install: install 'devtools' package, then run: devtools::install_github("gadenbuie/xaringanExtra")
xaringanExtra::use_panelset() # allow the creation of tabs in Distill (see https://github.com/rstudio/distill/issues/11)
library(tidyverse) # includes dplyr, tidyr, ggplot2, rio, stringr, ...
library(leaflet) # for the interactive map
library(tmaptools) # for geocoding
library(wordcloud) # for the wordcloud at Q33
library(tidyr) # for Q30 and Q31
library(ggiraph) # for responsive bubble plots
library(plotly) # for interactive bubble plots
library(ggExtra) # for interactive Heatmaps
library(ggiraphExtra) # for interactive Heatmaps
library(moonBook) # for interactive Heatmaps
library(sjmisc) # for interactive Heatmaps
# library(ggchicklet) # to round edges of barplots. install.packages("ggchicklet", repos = "https://cinc.rud.is")
```
```{r}
# Data:
# reodering certain factors for the plots: (use barplot_ordered() for plotting)
Qs <- rio::import("data/clean/clean_all_Qs.rds") %>%
mutate(
purch_influence_16 = fct_relevel(purch_influence_16,
"I have little or no influence",
"I have some influence",
"I have a great deal of influence"
),
job_satisfaction_22 = fct_relevel(job_satisfaction_22,
"Very dissatisfied",
"Slightly dissatisfied",
"Neither satisfied nor dissatisfied",
"Slightly satisfied",
"Very satisfied"),
edu_importance_20 = fct_relevel(edu_importance_20,
"Not at all important/not necessary",
"Somewhat important",
"Fairly important",
"Very important",
"Critically important"),
company_size_23 = fct_relevel(company_size_23,
"1",
"Below 10",
"Below 20",
"Below 100",
"Over 100",
"Over 500"
),
company_size_23 = recode(company_size_23,
"1" = "One Person Company"),
overtime_work_25 = fct_relevel(overtime_work_25,
"Never",
"Rarely: 1-2 days per year or less",
"Occasionally: 1-2 days per quarter but less than monthly",
"Sometimes: 1-2 days per month but less than weekly",
"Often: 1-2 days per week",
"Frequently: 3 or more days per week",
),
overtime_work_25 = recode(overtime_work_25,
"Often: 1-2 days per week" = "1-2 days per week",
"Sometimes: 1-2 days per month but less than weekly" = "1-2 days per month",
"Rarely: 1-2 days per year or less" = "<1-2 days per year",
"Occasionally: 1-2 days per quarter but less than monthly" = "1-2 days per quarter",
"Frequently: 3 or more days per week" = ">3 days per week"),
monthly_salary_28 = fct_relevel(monthly_salary_28,
"Greater than GHS 25,000",
"GHS 20,000 - GHS 25,000",
"GHS 15,000 - GHS 20,000",
"GHS 10,000 - GHS 15,000",
"GHS 8,000 - GHS 10,000",
"GHS 6,000 - GHS 8,000",
"GHS 5,000 - GHS 6,000",
"GHS 4,000 - GHS 5,000",
"GHS 3,500 - GHS 4,000",
"GHS 3,000 - GHS 3,500",
"GHS 2,500 - GHS 3,000",
"GHS 2,000 - GHS 2,500",
"GHS 1,500 - GHS 2,000",
"Less than GHS 1,500"),
prim_study_19 = recode(prim_study_19,
"Computer_Science_Engineering" = "Computer Science/Engineering",
"Information_Technology" = "Information Technology",
"Other_Engineering" = "Other Engineering",
"Health_Science" = "Health Science",
"Social_Science" = "Social Science",
"Web_Development_Web_Design" = "Web-Development/Web-Design",
"General_Science" = "General Science",
"Electrical_Engineering" = "Electrical Engineering",
"Visual_Arts" = "Visual Arts",
"Agricultural_Science" = "Agricultural Science",
"Graphic_Design" = "Graphic Design",
"General_Arts" = "General Arts",
"Mathematics_Statistics" = "Mathematics/Statistics"),
prim_study_19 = fct_relevel(prim_study_19,
"Education",
"Social Science",
"Graphic Design",
"Architecture",
"Visual Arts",
"Business",
"General Arts",
"General Science",
"Health Science",
"Chemistry",
"Other Engineering",
"Physics",
"Agricultural Science",
"Web-Development/Web-Design",
"Mathematics/Statistics",
"Electrical Engineering",
"Information Technology",
"Computer Science/Engineering"),
highest_edu_18 = recode(highest_edu_18,
"Secondary_High_School" = "Secondary High School",
"Basic_Education" = "Basic Education",
"Higher_National_Diploma" = "Higher National Diploma",
"Professional_Diploma" = "Professional Diploma",
"Teacher_Diploma" = "Teacher Diploma",
"College" = "Bachelor"),
highest_edu_18 = fct_relevel(highest_edu_18,
"Basic Education",
"Secondary High School",
"Teacher Diploma",
"Professional Diploma",
"Higher National Diploma",
"Bachelor",
"Master"),
profession_1 = recode(profession_1,
"I am a developer by profession" = "I am a developer by profession",
"I am not primarily a developer, but I write code sometimes as part of my work" = "I am not primarily a developer,\nbut sometimes write code at work",
"I am a student who is learning to code" = "I am a student who is learning to code",
"I code primarily as a hobby" = "I code primarily as a hobby",
"I used to be a developer by profession, but no longer am" = "I used to be a developer by profession,\nbut no longer am",
"None of these" = "None of these"),
job_status_29 = as_factor(job_status_29),
job_status_29 = recode(job_status_29,
"I'm not actively looking, but I am open to new opportunities" = "I'm not actively looking,\nbut I am open to new opportunities",
"I am actively looking for a job" = "I am actively looking for a job",
"I am not interested in new job oportunities" = "I am not interested\nin new job oportunities"),
overtime_work_25 = recode(overtime_work_25,
"Often: 1-2 days per week" = "Often: 1-2 days per week",
"Frequently: 3 or more days per week" = "Frequently: 3 or more days per week",
"Sometimes: 1-2 days per month but less than weekly" = "Sometimes: 1-2 days per month\nbut less than weekly",
"Occasionally: 1-2 days per quarter but less than monthly" = "Occasionally: 1-2 days per quarter\nbut less than monthly",
"Rarely: 1-2 days per year or less" = "Rarely: 1-2 days per year or less",
"Never" = "Never"),
improve_onboarding_27 = recode(improve_onboarding_27,
"I don't know" = "NA",
"I’m not sure"= "NA",
"more"= "NA",
"N/A"= "NA",
"No idea"= "NA" ,
"Not working in a company"= "NA" ,
"I am quitting after nss"= "NA" ,
"With some experience"= "NA",
"I think just keep doing what they're doing will suffice."= "I am happy" ,
"It's great. Nothing comes to mind"= "I am happy" ,
"It’s fine"= "I am happy",
"It’s the best"= "I am happy",
"There is no nee for any improvement"= "I am happy",
"None"= "I am happy",
"The best I have encounter, won't change a thing."= "I am happy",
"Communication"= "Better documentation of infrastructure and work material",
"More documentation of code"= "Better documentation of infrastructure and work material",
"Documenting a structure"= "Better documentation of infrastructure and work material",
"Proper documentation of existing softwares"= "Better documentation of infrastructure and work material",
"Proper documentation of infrastructure"= "Better documentation of infrastructure and work material",
"Scripts to set up devices, documentation of help new team members familiarize themselves with the different projects" ="Better documentation of infrastructure and work material",
"set up a tutorial on the stack being used, architecture and how the development cycle goes"= "Better documentation of infrastructure and work material",
"Roles could be properly defined and documented"= "Clearly defined roles",
"By clearly defining the core functions of the various units/departments"= "Clearly defined roles",
"Company needs to allow the organisational structures to work without interference and clear task to be assigned to new hires"= "Clearly defined roles",
"Roles need to be well spelt out and every necessary document handed before start of work."= "Clearly defined roles",
"Set the expectations on the role earlier"= "Clearly defined roles",
"Through orientation"= "Better orientation for new workers",
"Could have someone to take me through the code instead of having to understand it myself"= "Better orientation for new workers",
"Assign mentors to newly assigned employees"= "Better orientation for new workers",
"Have orientation sessions before on-boarding"= "Better orientation for new workers",
"Orientation for new employees"= "Better orientation for new workers",
"Providing mentorship and an enabling environment for learning"= "Better orientation for new workers",
"Pair a new developer with developer skilled in his art within the organization and is willing to help"= "Better orientation for new workers",
"Slower rollout. Everything happens too fast, so it doesn't stick"= "Better orientation for new workers",
"A standardized structure for onboard ding"= "Clearer structure and standardisation",
"Having a standard onboarding process"= "Clearer structure and standardisation",
"By defining a structure in the first place"= "Clearer structure and standardisation",
"By defining a structure in the first place"= "Clearer structure and standardisation",
"Structured Onboarding program"= "Clearer structure and standardisation",
"By establishing an HR Department"= "Clearer structure and standardisation",
"Dedicated staff to handle on boarding process"= "Clearer structure and standardisation",
"By improving the organisational structure to facilitate the work flow"= "Clearer structure and standardisation",
"Improve the organizational structure" = "Clearer structure and standardisation",
"Let IT help desk raise tickets for each unit responsible for delivering into the process" ="Clearer structure and standardisation",
"Through IT skills" ="Better/more training",
"More specific training to understand fully the role as opposed to general orientation." ="Better/more training",
"Training in specific fields should be better encouraged" ="Better/more training",
"External trainings" ="Better/more training",
"Developing better internship systems for prospective workers" ="Better/more training",
"More coding workshops" ="Better/more training",
"By applying more practical tests for the individual" ="Better/more training",
"More employment" ="Employing more people",
"to employee more workers" ="Employing more people",
"Adequate funds for the right role" ="Other",
"A little interview on the chosen choice of language" ="Other",
"Capacity building" ="Other",
"inclusiveness" ="Other",
"Job positions should be made known" ="Other",
"More focus on developers" ="Other",
"More innovation and creativity" ="Other",
"Retention of talents.." ="Other",
"Observation through hard work" ="Other",
"Better background check" ="Other",
"Need new machines" ="Other",
"More interaction with leads" ="Other"
),
improve_onboarding_27 = fct_relevel(improve_onboarding_27,
"Other",
"Employing more people",
"Clearly defined roles",
"Better documentation of infrastructure and work material",
"Better/more training",
"Better orientation for new workers",
"Clearer structure and standardisation",
"I am happy"
),
improve_onboarding_27 = recode(improve_onboarding_27,
"Other" = "Other",
"Employing more people" = "Employing more people",
"Clearly defined roles" = "Clearly defined roles",
"Better documentation of infrastructure and work material" = "Better documentation of\ninfrastructure and work material",
"Better/more training" = "Better/more training",
"Better orientation for new workers" = "Better orientation for new workers",
"Clearer structure and standardisation" = "Clearer structure and standardisation",
"I am happy" = "I am happy"
),
change_edu_21 = recode(change_edu_21,
"The program of study. I would probably take a computer science course" = "I would study Computer Science",
"Yes" = "NA",
"In the university at the moment" = "NA",
"Business or graphic design"= "Field of Study",
"I would select information technology"= "Field of Study",
"field of study"= "Field of Study",
"Would do computer engineering"= "Field of Study",
"I will change the course I chose."= "Field of Study",
"Material engineering"= "Field of Study",
"My Choice of university of study"= "Field of Study",
"I'll do electronic and electrical engineering instead of computer science"= "Field of Study",
"I would change my programme of study to computer science, or software engineering."= "Field of Study",
"I'd probably have studied general arts in senior high, then turned the Computer Science department ict labs into my playground in University." = "Field of Study",
"My choice of course in high school because i was a visual arts student"= "Courses during High School",
"My final year elective courses"= "Courses during High School" ,
"The business course in secondary school"= "Courses during High School",
"Combination of modules I select for the course"= "Courses during High School",
"Curriculum"= "Courses during High School",
"change my school"= "School/University",
"My Education"= "School/University",
"My tertiary institution"= "School/University",
"Approach to learning"= "Learning Approach/Methods",
"How I learned programming"= "Learning Approach/Methods",
"How the courses are taught"= "Learning Approach/Methods",
"The learning platforms used"= "Learning Approach/Methods",
"Will advocate for modern trends to be taught"= "Learning Approach/Methods",
"Tech entrepreneurship should be a course to understand the business of tech."= "Other",
"The non practical aspects of studies"= "Other",
"Master degree"= "Other",
"learn more independently"= "Learn more independently",
"learn programming earlier"= "Learn programming earlier",
"more practical work"= "More practical work",
"I woud have added something else to my studies"= "Adding additional things to studies"),
change_edu_21 = fct_relevel(change_edu_21,
"Nothing",
"Other"),
monthly_salary_28_7 = recode(monthly_salary_28,
"Greater than GHS 25,000" = "Greater than GHS 15,000",
"GHS 20,000 - GHS 25,000" = "Greater than GHS 15,000",
"GHS 15,000 - GHS 20,000"= "Greater than GHS 15,000",
"GHS 10,000 - GHS 15,000"= "GHS 8,000 - GHS 15,000",
"GHS 8,000 - GHS 10,000"= "GHS 8,000 - GHS 15,000",
"GHS 6,000 - GHS 8,000"= "GHS 3,500 - GHS 8,000",
"GHS 5,000 - GHS 6,000"= "GHS 3,500 - GHS 8,000",
"GHS 4,000 - GHS 5,000"= "GHS 3,500 - GHS 8,000",
"GHS 3,500 - GHS 4,000"= "GHS 3,500 - GHS 8,000"),
monthly_salary_28_7 = fct_relevel(monthly_salary_28_7,
"Less than GHS 1,500",
"GHS 1,500 - GHS 2,000",
"GHS 2,000 - GHS 2,500",
"GHS 2,500 - GHS 3,000",
"GHS 3,000 - GHS 3,500",
"GHS 3,500 - GHS 8,000",
"GHS 8,000 - GHS 15,000",
"Greater than GHS 15,000"
),
new_tool_7 = recode(new_tool_7,
"Every few months" = "Every few months",
"Once a year" = "Once a year",
"Once every few years" = "Once every few years",
"Once a decade" = "Once a decade",
"This varies depending on work and projects" = "This varies depending on work\nand projects",
"Other" = "Other"
)
)
skills <- rio::import("data/clean/skills_final.csv") %>%
select(-V1) %>% # artifact due to a save as .csv to remove (at least on a Mac?)
mutate_all(na_if,"") # the column 'level' contains blanks ("") instead of NAs -> transform them to NAs.
# we add characteristics of the respondents to the skills,
# so that we can differentiate their popularity by occupation or gender:
skills_with_characteristics <- skills %>% left_join(select(Qs, ID, profession_1, gender_35), by = c("id" = "ID"))
number_of_respondents = dim(Qs)[1] # number of respondents (272) used to compute percentages.
number_of_prof_dev = dim(Qs %>% filter(profession_1 == "I am a developer by profession"))[1]
number_of_students = dim(Qs %>% filter(profession_1 == "I am a student who is learning to code"))[1]
```
```{r}
# Themes and Functions:
# Definition of some styling variables for the plots,
# they are used in the theme and plotting functions.
highlight_col = "#d4145a"
text_col = "#777777"
header_col = "#222222"
font_in_viz = "sans" # !!! SHOULD WE CHANGE ????
text_col_in_viz = header_col
main_fontsize = 10 # 'default' base size for the plots
HL_colors = c("#59B0E3","#E3E19A","#6FE38E","#AF8CDE","#DF4B4F")
colours = c( "#610b70", "#88b101", "#eb1c96", "#e98403", "#45454C", "#000001", "black")
colours_4 = c("#58AFE2","#817BB5","#AB4887","#D4145A")
colours_6 = c("#58AFE2", "#7190C7", "#8A71AC", "#A25290","#BB3375","#D4145A")
colours_8 = c( "#58AFE2", "#6A99CF", "#7B83BB", "#8D6DA8", "#9F5694", "#B14081", "#C22A6D", "#D4145A")
# HL_colors_sequential = c(text_col, "#610b70","#88b101","#eb1c96","#e98403","#454545")
geom_text_size = 3.5 # size of text in plots
barplot_theme <- function(){
# the definition of a custom ggplot theme for our barplots.
theme_minimal() +
theme(
plot.title = element_text(family = font_in_viz, size = main_fontsize + 2, color = text_col_in_viz),
# plot.subtitle = element_text(family = font_in_viz, size = main_fontsize),
axis.title = element_text(family = font_in_viz, size = main_fontsize, color = text_col_in_viz),
axis.text = element_text(family = font_in_viz, size = main_fontsize, color = text_col_in_viz),
# plot.caption = element_text(hjust = 0, family = my_font,
# size = main_fontsize, face= "italic"), #Default is hjust=1 / 0
plot.title.position = "plot", # left-align title
# plot.caption.position = "plot",
panel.grid.minor.x = element_blank(), # remove useless minor grid
panel.grid.major.y = element_blank(), # remove useless major grid in bar charts
panel.grid.major.x = element_blank(), # remove useless major grid in bar charts
axis.title.x = element_blank(), # we do not want an y axis for the barplot, we have labels already
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
# plot.margin = margin(0.1, 0.1, 0.1, 0.1, unit = "cm")
)
}
basic_barplot <- function(my_df, a_factor, its_values, labels, title, label_spacing = 0.07){
# a function to plot a standard barplot
# uses the barplot_theme defined above
# needs an (ordered) factor as input, and its values
# returns a ggplot
current_plot <- ggplot(data = my_df,
aes(x = fct_reorder({{a_factor}}, {{its_values}}),
y = {{its_values}})) +
# LOLLIPOP:
geom_point(color = highlight_col, size = 5) +
geom_segment(aes(x={{a_factor}}, xend={{a_factor}},
y=0, yend={{its_values}}),
color = highlight_col,
size = 1) +
# # rounding the edges of the bars:
# geom_chicklet(radius = grid::unit(1, 'mm'), fill = highlight_col) +
geom_text(
aes(label = {{labels}}, y = {{its_values}} + (label_spacing * max({{its_values}}))),
size = geom_text_size
) +
scale_y_continuous(labels = scales::percent) +
barplot_theme() +
coord_flip() +
labs(title = stringr::str_wrap(title,100)) + # automated text wrap in title
xlab("") +
ylab("")
return(current_plot)
}
#Function for barplot that does not override order
barplot_ordered <- function(my_df, a_factor, its_values, labels, title, label_spacing = 0.07){
# a function to plot a standard barplot
# uses the barplot_theme defined above
# needs an (ordered) factor as input, and its values
# returns a ggplot
current_plot <- ggplot(data = my_df,
aes(x = {{a_factor}},
y = {{its_values}})) +
# # CHICKLET
# geom_chicklet(radius = grid::unit(1, 'mm'), fill = highlight_col) +
# SKINNY LOLLIPOPO:
geom_point(color = highlight_col, size = 5) +
geom_segment(aes(x={{a_factor}}, xend={{a_factor}},
y=0, yend={{its_values}}),
color = highlight_col,
size = 1) +
geom_text(
aes(label = {{labels}}, y = {{its_values}} + (label_spacing * max({{its_values}}))),
size = geom_text_size
) +
scale_y_continuous(labels = scales::percent) +
barplot_theme() +
coord_flip() +
labs(title = stringr::str_wrap(title,100)) + # automated text wrap in title
xlab("") +
ylab("")
return(current_plot)
}
compute_perc <- function(my_df, a_factor){
# a small function to count a factor and compute the percentages
# also return the percentages as char for labelling.
# USE THIS FUNCTION IF THE SUM OF THE ANSWERS IS 272! (including NAs) -> not working if several possible choices.
my_df %>%
count({{a_factor}}) %>%
drop_na() %>% # always in percentage of the respondents to the question.
mutate(
perc = n/sum(n),
perc_label = paste0(as.character(round(perc*100,1)), "%")
) %>% return()
}
compute_perc_popular_skills <- function(the_skills, skillset, number_of_resp){
# USE THIS FUNCTION for the popular skills (last year + both)
# NOTE: we use all respondents, not only those who responded. -> some respondents did not enter any skills. -> not optimal, but annoying to correct?
the_skills %>%
filter(tool %in% skillset) %>% # select only the languages from all the tools
filter(level == "Worked with in PAST year" | level == "Both") %>%
count(tool) %>%
mutate(
perc = n/number_of_resp,
perc_label = paste0(as.character(round(perc*100,1)), "%")
) %>%
arrange(-perc) %>%
return()
}
compute_perc_future_skills <- function(the_skills, skillset, number_of_resp){
# USE THIS FUNCTION for the future, nexy year, skills (next year only)
# NOTE: we use all respondents, not only those who responded. -> some respondents did not enter any skills. -> not optimal, but annoying to correct?
the_skills %>%
filter(tool %in% skillset) %>% # select only the languages from all the tools
filter(level == "Want to work with NEXT year") %>%
count(tool) %>%
mutate(
perc = n/number_of_resp,
perc_label = paste0(as.character(round(perc*100,1)), "%")
) %>%
arrange(-perc) %>%
return()
}
bubble_plotly_theme <- function(){
theme_minimal() +
theme(legend.position="none",
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_blank()
)
}
bubble_plot <- function(df, x_value, y_value, freq, colour_choice, title){
ggplot(data = df, aes(x={{x_value}}, y={{y_value}},
text=text)) + # text = text that appears when hover over the bubble, needs to be defined beforehand
geom_point(aes(size=ifelse({{freq}}==0, NA, {{freq}}), # bubbles with the size according to how many respondents
fill = {{x_value}}), # bubble color according to x value level
alpha = 0.75,
shape = 19, color=NA) + # circles and no border
scale_size(range = c(1.4, 15)) + # size of the bubbles
labs( x= "", y = "", size = "Number of respondents", fill = "") +
scale_fill_manual(values = {{colour_choice}}) +
scale_colour_manual(values= "white")
}
# Trying to optimize the heights of the plots:
# heights depends on number of "bars"
# e.g. b3 means all the plots with 3 bars have an fig.height of 1.5
b2 <- 1
b3 <- 1.5
b4 <- 1.8
b6 <- 2.5
b8 <- 2.8
b10 <- 3
b18 <- 4
```
# Overview
Hacklab Ghana Developer Census 2020 is the first and most comprehensive survey of people who code in Ghana. In 2020, Hacklab Research fielded a survey covering everything from developers’ favorite technologies to their job preferences. This marks the first of annual surveys to be published. This maiden edition witnessed the participation of 272 developers who took the 20-minute survey earlier between November - December 2020.
Despite our survey’s reach and capacity for informing valuable conclusions, we acknowledge that our results don’t represent everyone in the Ghana developer community evenly. We have further work to do to make the Hacklab Ghana Developer Census 2020 a more inclusive, diverse platform, and a reflection in the community at large.
We are committed to building on steps we have taken and improving the coverage, insights and beyond in the coming years to better improve the support and interventions needed to give developers a more enabling ecosystem to thrive. Some of these survey's results directly guide those efforts. To address the characteristics of our data, be sure to check out where we summarize results by developer persona (Professional Developer, Student) or gender.
We looked at breakdowns by demographics throughout our analysis and its reflection of the distribution of talents.
Want to dive into the results yourself? The anonymized results of the survey are [available for download](https://github.com/Hacklab-Foundation/Developer-Census-2020) under the [Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/1-0/). We look forward to seeing what you find!
This maiden edition could not have been successful without the contributions of [Twitter](https://twitter.com/?lang=en) and [CorrelAid](https://correlaid.org/).
### About The Hacklab Foundation
The Hacklab Foundation is an international nonprofit organization headquartered in Ghana with a focus on preparing the youth for future digital jobs through technology education and skills development. We achieve this through bootcamps, hackathons, mentorship and coaching, internships, digital skills training, and job placement.
Since our inception in 2015, we have directly impacted over 10,000 people, organized hackathons, robotics, and coding bootcamps for kids between the ages of 7yrs - 13yrs supported 500+ women in tech, 300+ youth were placed in jobs and 250+ youth were placed in internships. Through our partnership with IBM, we launched the Ghana National Digital Skills Training Program in November 2018, with a goal to reach 100,000 people by 2021.
#### Statement of Inclusion
The Hacklab Foundation believes that creating an equal platform for everyone, irrespective of race, gender, social class, and physical limitations will allow for a fair chance to compete for the same opportunity. This has been at the core of our initiatives.
## Key Insights
Here are a few of the top takeaways from this year’s results.
**1. Low Female Representation:**
Of the 272 respondents, 17% indicated being women. Only 10% of the 130 professional developers are women. However, this percentage has the potential to increase in the upcoming years as 24% of the 84 students are women. [Learn more](#gender).
**2. Geographical Concentration:**
70% of the respondents are from the Greater Accra Region. [Explore the map](#geography).
**3. Most Used Languages:**
HTML/CSS, JavaScript, Python, and SQL are the most used languages by Ghanaian developers. Learn more about the popularity of other languages and the preferences of professional developers [here](#popu_language).
**4. Most Familiar Frameworks:**
React.js is the most used web framework. Node.js is also a widely used framework. [Learn more](#popu_webframework).
**5. Strong Developers' Communities:**
Of the numerous communities listed by the respondents (122), the three largest communities to which they indicated membership were DevCongress (13.6%), Facebook Developer Circle (9.6%), and the Hacklab Foundation (7.4%). [See all the communities](#dev_communities).
**6. Highest Level of Education:**
The vast majority of the respondents have at least a secondary high school degree, and 70% indicated having a Bachelor's degree. Only 4% indicated having a Master's degree. [Learn more](#highest_education).
**7. Primary Field of Study:**
Most participants study or studied Computer Science or Computer Engineering (55%), followed by Information Technology (11.6%) and Business (5.4%). [Learn more](#study_field).
**8. Overtime & Compensation:**
62% of the respondents indicated receiving a monthly salary lower than 2000 GHS, this percentage drops to 47.5% for respondents who indicated being professional developers. Around 50% of the respondents work overtime on 3 or more days in a week.
Learn more about the [working conditions](#work_cond) and [salaries](#salary) of the respondents.
<hr>
# Developer Profile
## Type of Developer
### What describes you best?
The two largest subgroups among the respondents are professional developers and students. Additionally, there are respondents coding as a part of their work, coding as a hobby, as well as former developers.
For the remainder of this report we use these categories, specifically students and professional developers, to highlight particular differences between participants.
```{r, fig.align='center', out.width = '100%', fig.height = b6}
basic_barplot(my_df = compute_perc(Qs, profession_1),
a_factor = profession_1, its_values = perc, labels = perc_label,
title = "",
label_spacing = 0.1)
```
### Do you code as a hobby?
Most of the respondents code as a hobby. Interestingly, professional developers seem to code for a hobby less often than students which may be related to their reduced time availability.
::::: {.panelset}
::: {.panel}
##### All Respondents {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
basic_barplot(my_df = compute_perc(Qs, hobby_coding_2),
a_factor = hobby_coding_2, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Professional Developers {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
basic_barplot(my_df = compute_perc(Qs %>% filter(profession_1 == "I am a developer by profession"), hobby_coding_2),
a_factor = hobby_coding_2, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Students {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
basic_barplot(my_df = compute_perc(Qs %>% filter(profession_1 == "I am a student who is learning to code"), hobby_coding_2),
a_factor = hobby_coding_2, its_values = perc, labels = perc_label,
title = "")
```
:::
::::
## Employment
```{r}
employment_data <- Qs %>%
filter(employment_3 != "I prefer not to say") %>%
mutate(
employment_3 = factor(employment_3,
levels(employment_3)[c(4,2,3,6,5,1)])
)
```
Most of the respondents are full-time employees and there is also a great share of students. A significant share of the respondents are unemployed and looking for work.
When it comes to gender differences, there is a greater share of students among the female respondents. In addition, women seem to be less often self-employed than men.
::::: {.panelset}
::: {.panel}
##### All Respondents {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
basic_barplot(my_df = compute_perc(employment_data, employment_3),
a_factor = employment_3, its_values = perc, labels = perc_label,
title = "",
label_spacing = 0.1)
```
:::
::: {.panel}
##### Men only {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
barplot_ordered(my_df = compute_perc(employment_data %>% filter(gender_35 == "Man"), employment_3),
a_factor = employment_3, its_values = perc, labels = perc_label,
title = "",
label_spacing = 0.12) +
scale_y_continuous(expand = c(0, .03)) # avoid cut off of label
```
:::
::: {.panel}
##### Women only {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
barplot_ordered(my_df = compute_perc(employment_data %>% filter(gender_35 == "Woman"), employment_3),
a_factor = employment_3, its_values = perc, labels = perc_label,
title = "",
label_spacing = 0.12) +
scale_y_continuous(expand = c(0, .03)) # avoid cut off of label
```
:::
::::
## Geography <a name="geography"></a>
### Region
Most respondents come from the Greater Accra Region. The extent of this concentration in Accra seems to be larger for professional developers than for students.
::::: {.panelset}
::: {.panel}
##### All Respondents {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b10}
region_data <- Qs %>%
filter(region_4 != "Not in Ghana")
region_data$region_4 <- recode(region_data$region_4,
"Eastern Region, Ghana" = "Eastern Region",
"Northern Region, Ghana" = "Northern Region",
"Central Region, Ghana" = "Central Region",
"Western Region, Ghana"= "Western Region"
)
basic_barplot(my_df = compute_perc(region_data, region_4),
a_factor = region_4, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Professional Developers {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b10}
basic_barplot(my_df = compute_perc(region_data %>% filter(profession_1 == "I am a developer by profession"), region_4),
a_factor = region_4, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Students {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b10}
basic_barplot(my_df = compute_perc(region_data %>% filter(profession_1 == "I am a student who is learning to code"), region_4),
a_factor = region_4, its_values = perc, labels = perc_label,
title = "")
```
:::
::::
### City
City of the respondents. Zoom in to get a more detailed picture.
Hover over the individual markers to see the professional status of the respondents.
```{r, fig.align= 'center', out.width= '100%', fig.height = 6.5}
# the geocoding was done separately in data_cleaning/Geocoding.R
# (to not lose time everytime we knitr)
# we load the result:
data_city_geo <- rio::import("data/clean/data_city_geo.rds")
#Map for the cities
violet_icon <- makeIcon(
iconUrl = "https://raw.githubusercontent.com/pointhi/leaflet-color-markers/master/img/marker-icon-violet.png",
iconWidth = 24,
iconHeight = 32)
leaflet(data = data_city_geo) %>%
addTiles() %>%
addMarkers(~ geocode$lon, ~ geocode$lat,
clusterOptions = markerClusterOptions(),
icon = violet_icon, label = ~profession_1)
```
## Demographics
### Age
Most respondents are between 20 and 30 years old.
While the students are very young in most cases, the professional developers display a little more variance in their ages. However, we have a very young sample at hand.
```{r}
Qs$age_range_6 <- factor(Qs$age_range_6, levels(Qs$age_range_6)[c(6:1)])
```
::::: {.panelset}
::: {.panel}
##### All Respondents {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
barplot_ordered(my_df = compute_perc(Qs, age_range_6),
a_factor = age_range_6, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Professional Developers {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
barplot_ordered(my_df = compute_perc(Qs %>% filter(profession_1 == "I am a developer by profession"), age_range_6),
a_factor = age_range_6, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Students {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b6}
barplot_ordered(my_df = compute_perc(Qs %>% filter(profession_1 == "I am a student who is learning to code"), age_range_6),
a_factor = age_range_6, its_values = perc, labels = perc_label,
title = "")
```
:::
::::
### Gender <a name="gender"></a>
#### What Gender do you identify with?
There is a greater share of women among the students than among the professional developers.
Ghana's developer community may thus become more representative in the upcoming years.
```{r}
#Removing NA
gender_data <- Qs %>%
filter(gender_35 != "NA")
```
::::: {.panelset}
::: {.panel}
##### All Respondents {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
barplot_ordered(my_df = compute_perc(gender_data, gender_35),
a_factor = gender_35, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Professional Developers {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
barplot_ordered(my_df = compute_perc(gender_data %>% filter(profession_1 == "I am a developer by profession"), gender_35),
a_factor = gender_35, its_values = perc, labels = perc_label,
title = "")
```
:::
::: {.panel}
##### Students {.panel-name}
```{r, fig.align='center', out.width = '100%', fig.height = b2}
barplot_ordered(my_df = compute_perc(gender_data %>% filter(profession_1 == "I am a student who is learning to code"), gender_35),
a_factor = gender_35, its_values = perc, labels = perc_label,
title = "")
```
:::
::::
#### Do you identify as transgender?
```{r}
transgender_data <- Qs %>%
filter(transgender_36 != "NA")
```
```{r, fig.align='center', out.width = '100%', fig.height = b2}
basic_barplot(my_df = compute_perc(transgender_data, transgender_36),
a_factor = transgender_36, its_values = perc, labels = perc_label,
title = "")
```
### Sexual orientation
```{r}
sexual_orientation_data <- Qs %>%