Load the Data

library(covdata)
#> 
#> Attaching package: 'covdata'
#> The following object is masked from 'package:datasets':
#> 
#>     uspop
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✔ ggplot2 3.3.3     ✔ purrr   0.3.4
#> ✔ tibble  3.1.1     ✔ dplyr   1.0.5
#> ✔ tidyr   1.1.3     ✔ stringr 1.4.0
#> ✔ readr   1.4.0     ✔ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()

National level case and mortality data from the European Centers for Disease Control.

covnat_weekly
#> # A tibble: 12,720 x 11
#>    date       year_week cname      iso3      pop cases deaths cu_cases cu_deaths
#>    <date>     <chr>     <chr>      <chr>   <dbl> <dbl>  <dbl>    <dbl>     <dbl>
#>  1 2019-12-30 2020-01   Afghanist… AFG    3.89e7     0      0        0         0
#>  2 2020-01-06 2020-02   Afghanist… AFG    3.89e7     0      0        0         0
#>  3 2020-01-13 2020-03   Afghanist… AFG    3.89e7     0      0        0         0
#>  4 2020-01-20 2020-04   Afghanist… AFG    3.89e7     0      0        0         0
#>  5 2020-01-27 2020-05   Afghanist… AFG    3.89e7     0      0        0         0
#>  6 2020-02-03 2020-06   Afghanist… AFG    3.89e7     0      0        0         0
#>  7 2020-02-10 2020-07   Afghanist… AFG    3.89e7     0      0        0         0
#>  8 2020-02-17 2020-08   Afghanist… AFG    3.89e7     0      0        0         0
#>  9 2020-02-24 2020-09   Afghanist… AFG    3.89e7     1      0        1         0
#> 10 2020-03-02 2020-10   Afghanist… AFG    3.89e7     3      0        4         0
#> # … with 12,710 more rows, and 2 more variables: r14_cases <dbl>,
#> #   r14_deaths <dbl>

Plot national cases over time, highlighting specific countries of interest

## Libraries for the graphs
library(ggrepel)

## Convenince "Not in" operator
"%nin%" <- function(x, y) {
  return( !(x %in% y) )
}


## Countries to highlight
focus_on <- covnat_weekly %>%
  filter(cu_cases > 99, pop > 1e7) %>%
  mutate(rate = cu_cases / pop) %>% 
  group_by(iso3) %>% 
  summarize(mean_rate = mean(rate, na.rm = TRUE)) %>% 
  slice_max(n = 20, order_by = mean_rate) %>% 
  pull(iso3)

## Colors
cgroup_cols <- c("#195F90FF", "#D76500FF", "#238023FF", "#AB1F20FF", "#7747A3FF", 
                 "#70453CFF", "#D73EA8FF", "#666666FF", "#96971BFF", "#1298A6FF", 
                 "#6F9BD6FF", "#FF952DFF", "#195F90FF", "#D76500FF", "#238023FF",
                 "#70453CFF", "#D73EA8FF", "#666666FF", "#96971BFF", "#1298A6FF",
                 "gray70")

covnat_weekly %>%
  filter(cu_cases > 99, pop > 1e7) %>%
  mutate(rate = cu_cases / pop) %>% 
  mutate(days_elapsed = date - min(date),
        end_label = ifelse(date == max(date), cname, NA),
        end_label = recode(end_label, `United States` = "USA",
                           `Bolivia, Plurinational State of` = "Bolivia",
                           `Russian Federation` = "Russia",
                           `Dominican Republic` = "Dominican Rep.",
                            `Iran, Islamic Republic of` = "Iran",
                            `Korea, Republic of` = "South Korea",
                            `United Kingdom` = "UK"),
         cname = recode(cname, `United States` = "USA",
                        `Iran, Islamic Republic of` = "Iran",
                        `Korea, Republic of` = "South Korea",
                        `United Kingdom` = "UK"),
         end_label = case_when(iso3 %in% focus_on ~ end_label,
                               TRUE ~ NA_character_), 
         cgroup = case_when(iso3 %in% focus_on ~ iso3, 
                            TRUE ~ "ZZOTHER")) %>%
  ggplot(mapping = aes(x = days_elapsed, y = cu_cases / pop, 
         color = cgroup, label = end_label, 
         group = cname)) + 
  geom_line(size = 0.5) + 
  geom_text_repel(nudge_x = 0.75,
                  segment.color = NA) + 
  guides(color = FALSE) + 
  scale_color_manual(values = cgroup_cols) + 
  labs(x = "Days Since 100th Confirmed Case", 
       y = "Cumulative reported Cases per capita", 
       title = "Cumulative Reported Cases of COVID-19 per capita", 
       subtitle = paste("Selected Countries with >1m population. ECDC data as of", format(max(covnat_weekly$date), "%A, %B %e, %Y")), 
       caption = "Kieran Healy @kjhealy / Data: https://www.ecdc.europa.eu/") +
  theme_minimal()
#> Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
#> Warning: Removed 4988 rows containing missing values (geom_text_repel).

Deaths per Capita


covnat_weekly %>%
  filter(iso3 %in% focus_on) %>%
  ggplot(mapping = aes(x = date, y = (deaths/pop)*1e6)) + 
  geom_line(size = 0.5) + 
#  scale_y_continuous(labels = scales::comma_format(accuracy = 1)) + 
  facet_wrap(~ cname) +
  labs(x = "Date", 
       y = "Deaths per Million Population", 
       title = "Deaths from COVID-19, Selected Countries", 
       subtitle = paste("ECDC weekly data as of", format(max(covnat_weekly$date), "%A, %B %e, %Y")), 
       caption = "Kieran Healy @kjhealy / Data: https://www.ecdc.europa.eu/") +
  theme_minimal()

Note the effects of reporting issues in various countries.