-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path20230307_prepare_PIK_data_for_WDI.R
112 lines (82 loc) · 3.88 KB
/
20230307_prepare_PIK_data_for_WDI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Title: 20230307_prepare_PIK_data_for_WDI.r
# Description: Script to prepare emissions data from PIK for ingestion in WDI
# Date: 3/7/2023
# Author: Thijs Benschop
rm(list=ls())
#### Load libraries and data ####
library(dplyr)
library(data.table)
library(ggplot2)
setwd("C:/Users/wb460271/OneDrive - WBG/Documents/GitHub/WDI_GHG_emissions")
rm(list = ls())
# Data downloaded from https://zenodo.org/record/7727475
# Version 2.4.2 of PRIMAP-HIST dataset
list.files("./data_private/PIK")
#data_PIK <- as.data.table(read.csv("./data_private/PIK/Guetschow-et-al-2023-PRIMAP-hist_v2.4.1_final_16-Feb-2023.csv"))
data_PIK <- as.data.table(read.csv("./data_private/PIK/Guetschow-et-al-2023a-PRIMAP-hist_v2.4.2_final_09-Mar-2023.csv"))
dim(data_PIK)
colnames(data_PIK)
# Drop columns only keep years >1960, WDI starts in 1960
data_PIK[, paste0("X", 1750:1959) := NULL]
# Rename columns
colnames(data_PIK)
setnames(data_PIK, "scenario..PRIMAP.hist.", "scenario")
setnames(data_PIK, "category..IPCC2006_PRIMAP.", "category")
setnames(data_PIK, "area..ISO3.", "ISO3")
colnames(data_PIK)
# Compare HISTCR/HISTTP -> use HISTTP
#HISTCR: In this scenario country-reported data (CRF, BUR, UNFCCC) is prioritized over third-party data (CDIAC, FAO, Andrew, EDGAR, BP).
#HISTTP: In this scenario third-party data (CDIAC, FAO, Andrew, EDGAR, BP) is prioritized over country-reported data (CRF, BUR, UNFCCC))
table(data_PIK[, "scenario"])
# use third-party data
data_PIK <- data_PIK[scenario == "HISTCR"]
data_PIK[, "scenario" := NULL]
table(data_PIK[, "source"])
table(data_PIK[, "entity"])
table(data_PIK[, "ISO3"])
table(data_PIK[, "unit"]) # check units -> convert to CO2e?
table(data_PIK[, "entity"])
table(data_PIK[, "category"])
# Check which WDI countries available
# Load country WDI list (217 countries)
WDI_countries <- as.data.table(read.csv("Data/wdi_country_list.csv"))
setnames(WDI_countries, c("long_name", "ISO3"))
PIK_countries <- unique(data_PIK$ISO3)
length(PIK_countries) # 215 countries
table(WDI_countries$ISO3 %in% PIK_countries) # 198 WDI countries available in PIK
WDI_countries[!(ISO3 %in% PIK_countries)] # list of missing countries (not in PIK data)
# are these countries/territories included in other countries?
table(PIK_countries %in% WDI_countries$ISO3)
PIK_countries[!(PIK_countries %in% WDI_countries$ISO3)] # list of missing countries (not in WDI list)
# AIA - Anguilla, ANT - , ATA - Antartica, COK - Cook Islands, NIU - Niue,
# SHN - Saint Helena, TKL - Tokelau, TWN - Taiwan, VAT - Holy See
# others are aggregates
# Drop countries/regions not in WDI
data_PIK <- data_PIK[ISO3 %in% WDI_countries$ISO3]
# Convert data in long format
PIK_long <- melt(data_PIK, id.vars = c("source", "ISO3", "entity", "unit", "category"), #, "scenario"),
variable.name = "year", value.factor = FALSE, variable.factor = FALSE)
PIK_long[, year := as.numeric(substring(year, 2, 5))]
dim(PIK_long)
colnames(PIK_long)
table(PIK_long$unit)
PIK_long[, value := value / 1000] # convert to Mt from gigagram (gigagram = 10^9 g = 10^6 kg = 10^3 t = kt)
# Add "Time", "Country", "SCALE" columns as in DCS template
PIK_long[, Time := paste0("YR", year)]
PIK_long[, Country := ISO3]
PIK_long[, SCALE := 0]
# Prepare data for each indicator
table(PIK_long$entity)
table(PIK_long$category)
# 1) EN.GHG.TOTL.KT.CE
EN.GHG.TOTL.KT.CE <- PIK_long[entity == "KYOTOGHG (AR4GWP100)" & category == "M.0.EL",
.(Time, Country, SCALE, value)]
PIK_long[, Series := "EN.GHG.TOTL.KT.CE"]
# 1) EN.GHG.TOTL.KT.CE
EN.GHG.TOTL.KT.CE <- PIK_long[entity == "KYOTOGHG (AR4GWP100)" & category == "M.0.EL",
.(Time, Country, SCALE, value)]
PIK_long[, Series := "EN.GHG.TOTL.KT.CE"]
# 1) EN.GHG.TOTL.KT.CE
EN.GHG.TOTL.KT.CE <- PIK_long[entity == "KYOTOGHG (AR4GWP100)" & category == "M.0.EL",
.(Time, Country, SCALE, value)]
PIK_long[, Series := "EN.GHG.TOTL.KT.CE"]