COVID19データを分析する - sshshobj’s diary

前回はSASでJSONデータを取り込む記事を紹介しました。

今回は取り込んだCOVID19データを分析するプログラムを紹介します。

下のプログラムは連続感染者数が数を問わず増加しているケースを集計し、結果と詳細を出力しています。

f:id:essbase:20201128174711p:plain

上の例は11月27日までの集計結果です。

con_casesが連続感染者数の増加が1日から6日まで連続したケースを表し、

exact_countで各々のケースの数を集計しています。

total_countは積み上げの集計結果でtotalで160件になります。

各々のケースは下の詳細表で確認できます。最高連続感染者の増加が計測されたのは7月1日から7月6日までのケースで、11月から連続感染者の増加傾向が強いようです。

7月1日は緊急事態宣言が解除された後で全国で外出が目立ちましたし、11月はgotoキャンペーンで同じく外出の増加が計測されたためにこのような結果が計測されています。

f:id:essbase:20201128175308p:plain

詳しい詳細は省きますが、hash objectのsumincメソッドを使用して連続感染者の増加がみられたケースをカテゴリ分けし各々のケースが発生したイベントの回数を集計しています。

プログラム

filename resp temp;
 
proc http url='https://covid.ourworldindata.org/data/owid-covid-data.json' 
 method="GET" out=resp;
run;
 
/* Assign a JSON library to the HTTP response */
libname space JSON fileref=resp;
*assign country;
%let cntry=JPN;
 
data have;
 drop ordinal:;
 retain year month day _date dif_new_cases;
 set space.&cntry._data;
 by date;
 format _date yymmdds10.;
 dif_new_cases=dif(new_cases);
 _date=input(date, yymmdd10.);
 year=year(_date);
 month=month(_date);
 day=day(_date);
 output;
 drop date;
 rename _date=date;
run;
 
data want;
 length year month day dif_new_cases max_dif_new_cases max_dif_new_cases_date 
 new_cases max_new_cases max_new_cases_date new_deaths max_new_deaths 
 max_new_deaths_date stringency_index max_stringency_index 
 max_stringency_index_date 8;
 format date max_dif_new_cases_date max_new_cases_date max_new_deaths_date 
 max_stringency_index_date yymmdds10.;
 
 do until(last.month);
 set have;
 by month notsorted;
 dif_new_cases=ifn(first.month, ., dif_new_cases);
 
 if max_dif_new_cases < dif_new_cases then
 do;
 max_dif_new_cases=dif_new_cases;
 max_dif_new_cases_date=date;
 end;
 
 if max_new_cases < new_cases then
 do;
 max_new_cases=new_cases;
 max_new_cases_date=date;
 end;
 
 if max_new_deaths < new_deaths then
 do;
 max_new_deaths=new_deaths;
 max_new_deaths_date=date;
 end;
 
 if max_stringency_index < stringency_index then
 do;
 max_stringency_index=stringency_index;
 max_stringency_index_date=date;
 end;
 end;
 
 do until(last.month);
 set have;
 by month notsorted;
 output;
 end;
drop new_cases-numeric-new_tests_per_thousand tests_units;
run;
 
*連続感染者の推移をランキング出力;
data consec_event;
 if _N_=1 then do;
 if 0 then set want;
 dcl hash consecevt(ordered:"d",suminc:"exact_count");
 consecevt.definekey("con_cases");
 consecevt.definedata("con_cases","year","month","day");
 consecevt.definedone();
 end;
 con_cases=0;
 do until(last.month);
 set want end=lr;
 by year month notsorted;
 con_cases=ifn(sign(dif_new_cases)=1,con_cases+1,0);
 if sign(dif_new_cases)=1 then consecevt.ref();
 end;
 if lr;
 retain exact_count 1;
 total_adjust=0;
 do con_cases=consecevt.num_items to 1 by -1;
 consecevt.sum(sum:exact_count);
 total_count=exact_count+total_adjust;
 output;
 total_adjust+exact_count;
 end;
 keep con_cases--total_count;
run;
 
*連続感染者の推移の詳細を時系列で出力;
data consec_event_detail;
 if _N_=1 then do;
 if 0 then set want;
 dcl hash consecevt(multidata:"y",ordered:"d") out(multidata:"n",ordered:"d");
 consecevt.definekey("con_cases");
 consecevt.definedata("con_cases","year","month","day");
 consecevt.definedone();
 *重複キーを除外;
 out.definekey("start_date");
 out.definedata("start_date","end_date","con_cases");
 out.definedone();
 
 dcl hiter ci("consecevt") outi("out");
 end;
 format start_date end_date yymmdds10.;
 con_cases=0;
 do until(last.month);
 set want end=lr;
 by year month notsorted;
 con_cases=ifn(sign(dif_new_cases)=1,con_cases+1,0);
 if con_cases then consecevt.add();
 end;
 if lr;
 do while(ci.next()=0);
 end_date=mdy(month,day,year);
 start_date=end_date-con_cases+1;
 _iorc_=out.add();
 end;
 if lr;
 do while(outi.next()=0);
 output;
 end;
 drop dif: max: year month day;
run;