Příprava dat pro určování faktorů ovlivňujících šíření koronaviru na úrovni jednotlivých států.
Použité technologie: DBeaver, SQL, Tableau.
Výsledná data budou panelová, klíče budou stát a den.
Pro určení hlavních faktorů šíření koronaviru je potřeba kromě denních nárůstů nakažených přiřadit i počty provedených testů a počet obyvatel daného státu. Z těchto tří proměnných je potom možné vytvořit vhodnou vysvětlovanou proměnnou. Denní počty nakažených se budou dále vysvětlovat pomocí proměnných několika typů. Každý sloupec v tabulce bude představovat jednu proměnnou. Chceme získat následující sloupce:
Ukázka z výsledných panelových dat.
Výsledná tabulka obsahuje údaje v jednotlivých zemích platných daný den.
Tabulka má 94 142 řádků.
Jedna z možných vizualizací šíření koronaviru v Evropských zemích.
create view v_zeme_v_tests_ale_ne_v_diff as
select ct.country
from covid19_tests ct
except
select cbd.country
from covid19_basic_differences cbd ;
create view v_zeme_v_diff_ale_ne_v_tests as
select cbd.country
from covid19_basic_differences cbd
except
select ct.country
from covid19_tests ct ;
—- Czech republic = Czechia
—- Myanamar = Burma
—- South Korea = Korea, South
—- Taiwan= Taiwan*
—- United States = US
create table t_covid_tests_uprava_zemi as
select * from covid19_tests ct ;
update t_covid_tests_uprava_zemi set country = ‚Czechia‘ where ISO = ‚CZE‘;
update t_covid_tests_uprava_zemi set country = ‚Burma‘ where ISO = ‚MMR‘;
update t_covid_tests_uprava_zemi set country = ‚Korea, South‘ where ISO = ‚KOR‘;
update t_covid_tests_uprava_zemi set country = ‚Taiwan*‘ where ISO = ‚TWN‘;
update t_covid_tests_uprava_zemi set country = ‚US‘ where ISO = ‚USA‘;
create table t_covid_confirmed_tests as
select
cbd.country , cbd.`date` , cbd.confirmed , tctuz.tests_performed
from covid19_basic_differences cbd
left join t_covid_tests_uprava_zemi tctuz
on cbd.country = tctuz.country and cbd.`date` = tctuz.`date`
order by country asc;
create table t_mort5 as
select
country , mortaliy_under5
from economies e
where mortaliy_under5 is not null
group by country
order by country asc;
create table t_gdp_per_capita
select
country ,
GDP ,
round (gdp / population, 2) as gdp_per_capita
from economies e
where `year` = ‚2020‘
order by country asc;
create table t_gdp_per_capita_2019
select
z.country ,
e.GDP ,
round (e.gdp / e.population, 2) as gdp_per_capita
from economies e
join
(select country
from economies e2
where `year`= ‚2020‘ and gdp is null) as z
on z.country = e.country
where e.`year` = ‚2019‘ and e.GDP is not null;
update t_gdp_per_capita as base
inner join t_gdp_per_capita_2019 as a
on base.country = a.country
set base.gdp_per_capita = a.gdp_per_capita
where base.gdp_per_capita is null
and a.gdp_per_capita is not null;
create table t_gini as
select
country ,
`year` ,
gini as GINI
from economies e
where year >= 2010 and gini is not null
group by country
order by country asc;
create table t_pop_density
select
country ,
round (population / surface_area,4) as population_density
from countries c
where population != 0 and surface_area != 0
order by country asc;
create table t_religion_share as
select
base.country,
base.religion,
base.population,
a.total_population,
round ((base.population/a.total_population)*100,2) as perc_share_on_total_population
from
(select country , religion , population
from religions r
where `year` = 2020) as base
join
(select country, sum (population) as total_population
from religions r
where `year` = 2020
group by country) as a
on base.country = a.country;
create table t_life_expectancy_diff as
select
le15.country,
le15.life_expectancy_2015,
le65.life_expectancy_1965,
round (le15.life_expectancy_2015-le65.life_expectancy_1965, 2) as life_expectancy_diff
from
(select country, life_expectancy as life_expectancy_2015
from life_expectancy le
where `year` = 2015) as le15
join
(select country, life_expectancy as life_expectancy_1965
from life_expectancy le
where `year` = 1965) as le65
on le15.country = le65.country
order by country ;
create table t_weather as
select
zeme.country,
base.*
from
(select *
from weather w ) as base
left join
(select country, capital_city
from countries) as zeme
on base.city = zeme.capital_city ;
update t_weather set country = ‚Greece‘ where city = ‚Athens‘;
update t_weather set country = ‚Belgium‘ where city = ‚Brussels‘;
update t_weather set country = ‚Romania‘ where city = ‚Bucharest‘;
update t_weather set country = ‚Finland‘ where city = ‚Helsinki‘;
update t_weather set country = ‚Ukraine‘ where city = ‚Kiev‘;
update t_weather set country = ‚Portugal‘ where city = ‚Lisbon‘;
update t_weather set country = ‚Luxembourg‘ where city = ‚Luxembourg‘;
update t_weather set country = ‚Czechia‘ where city = ‚Prague‘;
update t_weather set country = ‚Italy‘ where city = ‚Rome‘;
update t_weather set country = ‚Austria‘ where city = ‚Vienna‘;
update t_weather set country = ‚Poland‘ where city = ‚Warsaw‘;
update t_weather set country = ‚Russian Federation‘ where city = ‚Moscow‘;
create table t_avg_temp as
select
*,
avg (cast (trim (trim (trailing ‚°c‘ from temp))as float)) as avg_temp
from t_weather tw
where `time` between ’06:00′ and ’18:00′
and country is not null
group by country, `date`;
create table t_srazky as
select
*,
count (rain2) as pocet_zaznamu_srazek, (count (rain2))*3 as Rain_hours
from
(select *,cast (trim (trim (trailing ‚mm‘ from rain))as float) as rain2
from t_weather tw
where country is not null) as base
where rain2 > 0
group by country, `date`;
create table t_max_gusty_wind as
select
*,
max (cast (trim (trim (trailing ‚km/h‘ from gust))as int)) as max_gusty_wind
from t_weather tw
where `time` between ’06:00′ and ’18:00′
and country is not null
group by country,`date` ;
create table t_covid_confirmed_tests_cas as
select
country ,
`date` ,
case
when dayofweek(`date`) IN (1,7) then ‚YES‘
else ‚NO‘
end as Weekend,
case when date_format(`date`, ‚%m %d‘) BETWEEN (’03 20′) AND (’06 20′) then 0
when date_format(`date`, ‚%m %d‘) BETWEEN (’06 21′) AND (’09 21′) then 1
when date_format(`date`, ‚%m %d‘) BETWEEN (’09 22′) AND (’12 20′) then 2
else 3
end as Season,
confirmed ,
tests_performed
from t_covid_confirmed_tests tcct ;
t_covid_confirmed_tests_cas.
update t_mort5 set country = ‚Brunei‘ where country = ‚Brunei Darussalam‘;
update t_mort5 set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_mort5 set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_mort5 set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_mort5 set country = ‚Korea, South‘ where country =’South Korea‘;
update t_mort5 set country = ‚Saint Kitts and Nevis‘ where country = ‚St. Kitts and Nevis‘;
update t_mort5 set country = ‚Saint Lucia‘ where country = ‚St. Lucia‘;
update t_mort5 set country = ‚Saint Vincent and the Grenadines’ where country = ‚St. Vincent and the Grenadines’;
update t_mort5 set country = ‚Congo (Kinshasa)‘ where country = ‚The Democratic Republic of Congo‘;
update t_mort5 set country = ‚Congo (Brazzaville)‘ where country = ‚Congo‘;
update t_mort5 set country = ‚US‘ where country = ‚United States‘;
update t_gdp_per_capita set country = ‚Brunei‘ where country = ‚Brunei Darussalam‘;
update t_gdp_per_capita set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_gdp_per_capita set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_gdp_per_capita set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_gdp_per_capita set country = ‚Korea, South‘ where country =’South Korea‘;
update t_gdp_per_capita set country = ‚Saint Kitts and Nevis‘ where country = ‚St. Kitts and Nevis‘;
update t_gdp_per_capita set country = ‚Saint Lucia‘ where country = ‚St. Lucia‘;
update t_gdp_per_capita set country = ‚Saint Vincent and the Grenadines’ where country = ‚St. Vincent and the Grenadines’;
update t_gdp_per_capita set country = ‚Congo (Kinshasa)‘ where country = ‚The Democratic Republic of Congo‘;
update t_gdp_per_capita set country = ‚Congo (Brazzaville)‘ where country = ‚Congo‘;
update t_gdp_per_capita set country = ‚US‘ where country = ‚United States‘;
update t_gini set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_gini set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_gini set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_gini set country = ‚Korea, South‘ where country =’South Korea‘;
update t_gini set country = ‚Saint Lucia‘ where country = ‚St. Lucia‘;
update t_gini set country = ‚Congo (Kinshasa)‘ where country = ‚The Democratic Republic of Congo‘;
update t_gini set country = ‚Congo (Brazzaville)‘ where country = ‚Congo‘;
update t_gini set country = ‚US‘ where country = ‚United States‘;
update t_pop_density set country = ‚US‘ where country = ‚United States‘;
update t_pop_density set country = ‚Korea, South‘ where country = ‚South Korea‘;
update t_pop_density set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_pop_density set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_pop_density set country = ‚Congo (Kinshasa)‘ where country = ‚Congo‘;
update t_religion_share set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_religion_share set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_religion_share set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_religion_share set country = ‚Korea, South‘ where country =’South Korea‘;
update t_religion_share set country = ‚Saint Kitts and Nevis‘ where country = ‚St. Kitts and Nevis‘;
update t_religion_share set country = ‚Saint Lucia‘ where country = ‚St. Lucia‘;
update t_religion_share set country = ‚Saint Vincent and the Grenadines’ where country = ‚St. Vincent and the Grenadines’;
update t_religion_share set country = ‚Congo (Kinshasa)‘ where country = ‚The Democratic Republic of Congo‘;
update t_religion_share set country = ‚Congo (Brazzaville)‘ where country = ‚Congo‘;
update t_religion_share set country = ‚US‘ where country = ‚United States‘;
update t_religion_share set country = ‚Taiwan*‘ where country = ‚Taiwan‘;
update t_life_expectancy_diff set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_life_expectancy_diff set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_life_expectancy_diff set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_life_expectancy_diff set country = ‚Korea, South‘ where country =’South Korea‘;
update t_life_expectancy_diff set country = ‚Congo (Kinshasa)‘ where country = ‚The Democratic Republic of Congo‘;
update t_life_expectancy_diff set country = ‚Congo (Brazzaville)‘ where country = ‚Congo‘;
update t_life_expectancy_diff set country = ‚US‘ where country = ‚United States‘;
update t_life_expectancy_diff set country = ‚Taiwan*‘ where country = ‚Taiwan‘;
update t_avg_temp set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_srazky set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_max_gusty_wind set country = ‚Russia‘ where country = ‚Russian Federation‘;
create table t_christianity as
select country , religion, perc_share_on_total_population as Christianity
from t_religion_share trs2 where religion = ‚Christianity‘;
create table t_islam as
select country , religion, perc_share_on_total_population as Islam
from t_religion_share trs2 where religion = ‚Islam‘;
create table t_Unaffiliated_Religions as
select country , religion, perc_share_on_total_population as Unaffiliated_Religions
from t_religion_share trs2 where religion = ‚Unaffiliated Religions‘;
create table t_hinduism as
select country , religion, perc_share_on_total_population as Hinduism
from t_religion_share trs2 where religion = ‚Hinduism‘;
create table t_buddhism as
select country , religion, perc_share_on_total_population as Buddhism
from t_religion_share trs2 where religion = ‚Buddhism‘;
create table t_Folk_Religions as
select country , religion, perc_share_on_total_population as Folk_Religions
from t_religion_share trs2 where religion = ‚Folk Religions‘;
create table t_Other_Religions as
select country , religion, perc_share_on_total_population as Other_Religions
from t_religion_share trs2 where religion = ‚Other Religions‘;
create table t_Judaism as
select country , religion, perc_share_on_total_population as Judaism
from t_religion_share trs2 where religion = ‚Judaism‘;
create table t_population as
select country ,population
from countries;
update t_population set country = ‚US‘ where country = ‚United States‘;
update t_population set country = ‚Korea, South‘ where country = ‚South Korea‘;
update t_population set country = ‚Burma‘ where country = ‚Myanmar‘;
update t_population set country = ‚Czechia‘ where country = ‚Czech Republic‘;
update t_population set country = ‚Congo (Kinshasa)‘ where country = ‚Congo‘;
update t_population set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_population set country = ‚Fiji‘ where country = ‚Fiji Islands‘;
update t_population set country = ‚Holy See‘ where country = ‚Holy See (Vatican City State)‘;
update t_population set country = ‚Libya‘ where country = ‚Libyan Arab Jamahiriya‘;
update t_population set country = ‚Micronesia‘ where country = ‚Micronesia, Federated States of‘;
update t_population set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_population set country = ‚Timor-Leste‘ where country = ‚East Timor‘;
update t_pop_density set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_pop_density set country = ‚Fiji‘ where country = ‚Fiji Islands‘;
update t_pop_density set country = ‚Holy See‘ where country = ‚Holy See (Vatican City State)‘;
update t_pop_density set country = ‚Libya‘ where country = ‚Libyan Arab Jamahiriya‘;
update t_pop_density set country = ‚Micronesia‘ where country = ‚Micronesia, Federated States of‘;
update t_pop_density set country = ‚Russia‘ where country = ‚Russian Federation‘;
update t_pop_density set country = ‚Timor-Leste‘ where country = ‚East Timor‘;
update t_mort5 set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_gdp_per_capita set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_gini set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_life_expectancy_diff set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
update t_life_expectancy_diff set country = ‚Micronesia‘ where country = ‚Micronesia (country)‘;
update t_life_expectancy_diff set country = ‚Timor-Leste‘ where country = ‚Timor‘;
update t_religion_share set country = „Cote d’Ivoire“ where country = ‚Ivory Coast‘;
create table t_Jiri_Valasek_projekt_SQL_final as
select
tcctc.*,
tp.population ,
tpd.population_density ,
tgpc.gdp_per_capita ,
tg.GINI ,
tm.mortaliy_under5 ,
tled.life_expectancy_diff ,
tc.Christianity ,
ti.Islam ,
tb.Buddhism ,
th.Hinduism ,
tj.Judaism ,
tfr.Folk_Religions ,
tur.Unaffiliated_Religions ,
tor.Other_Religions ,
tat.avg_temp ,
ts.Rain_hours ,
tmgw.max_gusty_wind
from t_covid_confirmed_tests_cas tcctc
left join t_population tp
on tcctc.country = tp.country
left join t_pop_density tpd
on tcctc.country = tpd.country
left join t_gdp_per_capita tgpc
on tcctc.country = tgpc.country
left join t_gini tg
on tcctc.country = tg.country
left join t_mort5 tm
on tcctc.country = tm.country
left join t_life_expectancy_diff tled
on tcctc.country = tled.country
left join t_christianity tc
on tcctc.country = tc.country
left join t_islam ti
on tcctc.country = ti.country
left join t_buddhism tb
on tcctc.country = tb.country
left join t_hinduism th
on tcctc.country = th.country
left join t_judaism tj
on tcctc.country = tj.country
left join t_folk_religions tfr
on tcctc.country = tfr.country
left join t_unaffiliated_religions tur
on tcctc.country = tur.country
left join t_other_religions tor
on tcctc.country = tor.country
left join t_avg_temp tat
on tcctc.country = tat.country and tcctc.`date` = tat.`date`
left join t_srazky ts
on tcctc.country = ts.country and tcctc.`date` = ts.`date`
left join t_max_gusty_wind tmgw
on tcctc.country = tmgw.country and tcctc.`date` = tmgw.`date` ;