Skip to content

Latest commit

 

History

History
414 lines (188 loc) · 6.41 KB

2.) Assignment # 2 - SQL Ad-Hoc - Gino Hobayan.MD

File metadata and controls

414 lines (188 loc) · 6.41 KB

Assignment #2

Ad-Hoc Analysis on covid-19_italy

Google BigQuery link:

https://console.cloud.google.com/bigquery?sq=132225947884:8de6f1dba3de42a8a1d94088c7217297

--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
/*

ASSIGNMENT #2 - SQL BOOTCAMP (AUG 2023) 
AD-HOC ANALYSIS EXERCISE FOR THE BIGQUERY PUBLIC DATASET: covid19_italy

BY: GINO FREUD HOBAYAN

*/
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------





/*
GUIDES:

Regular data = SUM
Cumulative data = MAX

Don't use "recovered" column for queries about vaccine effectivity.


`bigquery-public-data.covid19_italy.data_by_province` AS province

`bigquery-public-data.covid19_italy.data_by_region` AS region

`bigquery-public-data.covid19_italy.national_trends` AS nat_trends

*/ 






-- 1. WHAT is the region with the MOST confirmed cases?

SELECT
  region.region_name,
  MAX(region.total_confirmed_cases) AS max_confirmed_cases
FROM
  `bigquery-public-data.covid19_italy.data_by_region` AS region
GROUP BY
  region.region_name
ORDER BY
  max_confirmed_cases DESC

LIMIT 
  1


/*

ANSWER:
Region name: Lombardia
max_confirmed_cases: 4,170,894

*/




















-- 2. In which province is the difference between [the number of confirmed cases] and [the number of recovered patients] the greatest?

-- Using region TABLE
SELECT
  region.region_name,
  region.total_confirmed_cases AS num_of_confirmed_cases,
  region.recovered AS num_of_recovered_patients,

  region.total_confirmed_cases - region.recovered AS difference

FROM
  `bigquery-public-data.covid19_italy.data_by_region` AS region
ORDER BY
  difference DESC


/*

ANSWER: Lombardia

difference                 - 613,919

num_of_confirmed_cases     - 1,681,066
num_of_recovered_patients  - 1,067,147

*/
















-- 3. WHAT are the regions with the most recovered patients?

-- using "recovered" column for this specific query. Although not recommended when measuring vaccine effectivity, since a person can recover without a vaccine.
SELECT
  DISTINCT(region.region_name) AS regions_with_the_most_recovered,
  MAX(region.recovered) AS count_of_total_recovered
FROM
  `bigquery-public-data.covid19_italy.data_by_region` AS region
GROUP BY
  region.region_name
ORDER BY
  count_of_total_recovered DESC

/*

ANSWER:
1.)Lombardia        - 4,123,903
2.)Veneto           - 2,701,259
3.)Campania         - 2,444,627
4.)Lazio            - 2,390,707
5.)Emilia-Romagna   - 2,137,865
6.)Sicilia          - 1,812,771
7.)Piemonte         - 1,696,057
8.)Puglia           - 1,632,818

*/

















-- 4. WHAT is the total number of confirmed cases in italy

-- total_confirmed_cases column is Cumulative, we use MAX not SUM
SELECT
  MAX(nat_trends.total_confirmed_cases) AS total_num_of_confirmed_cases_in_italy   
FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends

/*

ANSWER:
total_num_of_confirmed_cases_in_italy: 25,929,238  

*/


















-- 5. WHAT is the total number of deaths in italy

SELECT
  MAX(nat_trends.deaths) AS total_num_of_deaths_in_italy    -- deaths column is Cumulative, we use MAX not SUM
FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends

/*

ANSWER:
total_num_of_deaths_in_italy: 191,167   

*/





/*
-- used this to figure out if a column is cumulative.
SELECT
  date,
  deaths
FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends
ORDER BY
  date
*/
















-- 6. WHAT is the ratio of [the number of deaths] to [the number of confirmed cases nationwide?]

SELECT
  MAX(nat_trends.deaths) AS num_of_deaths,
  MAX(nat_trends.total_confirmed_cases) AS num_of_confirmed_cases_in_italy,
  MAX(nat_trends.deaths) / MAX(nat_trends.total_confirmed_cases) AS multiply_100_for_the_ratio

FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends



/*

ANSWER: 
multiply_100_for_the_ratio: 0.00737
num_of_deaths: 191,167
num_of_confirmed_cases_in_italy: 25,929,238


Multiply 0.00737 by 100 = 0.74% RATIO




EXPLANATION:

For every 100 people or confirmed cases, there are around 0.74 deaths

For every 1,000 people or confirmed cases, there are around 7.4 deaths
7.4 people - died
992.6 people - recovered

*/

















-- 7. WHAT is the ratio of [the number of hospitalized patients] to [the number of confirmed cases in Italy?]
SELECT
  MAX(nat_trends.total_hospitalized_patients) AS num_of_hospitalized_patients,
  MAX(nat_trends.total_confirmed_cases) AS num_of_confirmed_cases_in_italy,

  MAX(nat_trends.total_hospitalized_patients) / MAX(nat_trends.total_confirmed_cases) AS multiply_100_for_the_ratio

FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends

/*

ANSWER: 
multiply_100_for_the_ratio: 0.001485
num_of_hospitalized_patients: 38,507
num_of_confirmed_cases_in_italy: 25,929,238


Multiply 0.001485 by 100 = 0.15% RATIO




EXPLAINED:

For every 100 people or confirmed cases in italy, 0.15% of those are hospitalized patients

For every 1,000 people or confirmed cases in italy, 1.5% of those are hospitalized patients
1.5 - hospitalized patients
998.5 - managing the illness at home


This ratio helps experts understand how serious the illness is and how many people are getting severely affected by it. 
It's like a simple math tool that helps them keep track of what's happening with the illness in Italy.

*/







/*
---------- used this to figure out if it's cumulative. (total_hospitalized_patients IS CUMULATIVE)
SELECT
  date,
  total_hospitalized_patients
FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends
ORDER BY
  date


--------- used this to figure out if it's cumulative. (total_confirmed_cases IS CUMULATIVE)
SELECT
  date,
  total_confirmed_cases
FROM
  `bigquery-public-data.covid19_italy.national_trends` AS nat_trends
ORDER BY
  date
*/












-- 8. WHAT are the top 5 provinces with the most confirmed cases?

SELECT
  DISTINCT(province.province_name) AS distinct_province_name,
  MAX(province.confirmed_cases) AS count_of_confirmed_cases
FROM
  `bigquery-public-data.covid19_italy.data_by_province` AS province
GROUP BY
  distinct_province_name
ORDER BY
  count_of_confirmed_cases DESC
LIMIT
  5

/*

ANSWER:
1.) Roma          - 1,792,437
2.) Milano        - 1,336,821
3.) Napoli        - 1,303,985
4.) Torino        - 923,618
5.) Padova        - 540,031

*/



-- END