#!/usr/bin/env python3
# gigo-20200324.py
# This script _tries to_ independently recalculate,
# from first principles, the graphs in the March 2020 paper
# "Social distancing strategies for curbing the COVID-19 epidemic".
# This script is _not_ an endorsement of the paper's model,
# and is _not_ an endorsement of the paper's policy conclusions.
# The paper's critical <=60% assumption regarding distancing
# comes from a reference that actually says >=60%; this in turn
# undermines all of the quantitative conclusions in the paper.
# The paper also fails to model other NPIs and future vaccinations.
# See below for various comments and questions regarding the model.
# Using this script to recalculate Figure 3B in paper:
plottingscript = """
./gigo-20200324.py > gigo.out
( echo 'set xrange [0:1200]'
echo 'set yrange [0:50]'
echo 'set xtics ("2020" 0,"2021" 366,"2022" 731,"2023" 1096)'
echo 'set terminal pdfcairo font "NotoMono" size 12,3'
echo 'plot "gigo.out" using 1:(500000*$17) notitle with lines lt rgb "red" linewidth 3, \'
echo '"gigo.out" using 1:(10000*($7+$11+$15+$17)) notitle with lines lt rgb "black" linewidth 3, \'
echo '"gigo.out" using 1:(50*(1-$3)) notitle with lines lt rgb "green" linewidth 3, \'
echo '"gigo.out" using 1:(37.5*$23) notitle with lines lt rgb "blue" linewidth 3'
) | gnuplot > gigo.pdf
"""
# The output seems to closely match the paper for 2020 and 2021,
# but some discrepancies build up in the March 2022 spike.
# (Of course one hopes for widespread vaccination already in 2021.)
# Clearly the paper's model has a mismatch with this script,
# or a mismatch with the paper's (unpublished?) software, or both.
# One candidate for a mismatch: the initial pulse.
# The long-term spike heights seem sensitive to the pulse details;
# the paper doesn't note this and doesn't clearly specify the pulse.
# This also brings the paper's 37.5 safety conclusion into question
# _even if_ the paper's stated assumptions are correct.
import math
# Paper's model splits population into nine disjoint subpopulations:
S = 1 # susceptible to disease but not exposed
E = 0 # exposed to disease
I = 0 # infectious, not hospitalized, not in critical care
# (paper pointlessly splits I into IR+IH+IC)
RR = 0 # previously had mild infection
HH = 0 # hospitalized with moderate infection
RH = 0 # previously was hospitalized with moderate infection
HC = 0 # hospitalized with critical infection
CC = 0 # in critical care with critical infection
RC = 0 # previously in critical care
# Paper assumes that hospitalized/critical people don't infect.
# Model question: How realistic is this? Don't they infect doctors?
# Model question: Is it realistic to model infectiousness as yes/no?
# Paper assumes that previously infected people are immune.
# Model question: How realistic is this?
# Paper takes a yes/no model of distancing.
distancing = 0 # will always be 0 or 1
# Time scale: Let's count days from beginning of 1 January 2020.
establishment = 70 # paper starts model on 11 March 2020
day = establishment
# Paper stops in July 2022 (912 days).
while day < 1199.999999:
# Paper assumes that R0 is between 2.0 and 2.5.
R0 = 2.0 # best case considered in paper
# Model question: What is the actual value?
# Paper assumes that summer reduces R0 between 0% and 30%.
summerimprovement = 0.3 # best case considered in paper
# Paper assumes a smooth cosine curve for summer R0 reduction.
wintershift = 26.6 # peak of winter before end of December; paper's 3.8 weeks
angle = 2*math.pi*(day+wintershift)/365.25
R0 *= 1-summerimprovement*0.5*(1-math.cos(angle))
# Model question: What is the actual summer improvement?
# Paper assumes that distancing reduces R0 by _at most_ 60%.
distancingimpact = 0.6
# Paper tries to justify this by citing reference 3 for the claim that
# China's "intense" distancing reduced R0 by 60%. However, reference 3
# actually says something very different: namely, if R0 started around
# 2.5 then evidently China must have reduced R0 by _at least_ 60%.
# This error in paper also undermines paper's 25%-70% conclusion.
# Model question: What is the actual effect of distancing?
# One of the distancing policies considered in paper:
if day >= establishment+14: # no distancing before 2 weeks
if I >= 37.5/10000: distancing = 1
if I <= 10.0/10000: distancing = 0
# Paper seems to claim that 37.5 is safe for critical-care capacity,
# but paper does not give adequate calculations to justify this.
# Futhermore, this policy obviously cannot be exactly implemented:
# we don't have instant perfect I measurements. Would be more useful
# to model implementable policies such as simple month-on-month-off,
# or (if distancing impact is larger) two-weeks-on-six-weeks-off.
# Paper advocates widespread surveillance to try to implement policy,
# but does not analyze impact of delays and errors in surveillance.
# Paper also claims that widespread surveillance is "necessary" for
# intermittent distancing to be "effective"; but paper's model
# includes scenarios where a simple month-on-month-off policy would
# be effective, and such a policy does not require surveillance.
# Having more data about population-infection rates would be useful
# for many reasons, but paper's argument for this is wrong.
R0 *= 1-distancing*distancingimpact
# Paper's model also assumes that other NPIs fail to reduce R0.
# Model question: What is impact of widespread hand-washing?
# Model question: What is impact of widespread wearing of masks?
# Paper's model also ignores, e.g., possibility of widespread
# vaccination in mid-2021, which would rapidly decrease S
# (assuming the vaccination is reasonably effective).
# After repeating the standard recommendation to increase
# critical-care capacity, paper says that this "allows" using such
# capacity to infect more people now (by increasing the 37.5 above).
# This is unethical: it will result in many unnecessary deaths,
# under the reasonable assumption of future widespread vaccination.
# Part of the problem here is the lack of vaccination in the paper's
# model. Another part of the problem is that the paper sets (1) a
# primary goal of not overrunning the critical-care system and (2) a
# secondary goal of minimizing distancing time, instead of treating
# both of these as tools to support the actual goal of positive health
# outcomes. The paper's model doesn't even notice how many people die.
# Paper assumes that infectiousness lasts 5 days on average.
Ispeed = 1/5.0 # "gamma" in paper
# Model question: What is the actual period of infectiousness?
Sspeed = R0*Ispeed
# For example, if R0 is 2 and Ispeed is 1/5:
# infectious people become non-infectious at speed 1/5,
# while they expose other people at speed 2/5 when S=1.
# As S drops, exposure drops proportionally.
Schange = -Sspeed*I*S
Echange = Sspeed*I*S
# Paper assumes that incubation lasts 5 days on average.
Espeed = 1.0/5 # "v" in paper; not clear in paper but see reference 4
# Model question: What is the actual incubation time?
Echange -= Espeed*E
Ichange = Espeed*E
# Paper does not clearly state model of initial burst of infections.
# As noted above, paper's 37.5 safety claim seems to depend on this.
# Reference 4 says "small pulse in the force of infection",
# namely "an increase of 0.01/week for one half week".
if day < establishment+3:
Schange -= 0.01/7
Echange += 0.01/7
# Paper's model has three possibilities for infection progressing.
prmild = 0.956 # "p_R" in paper
prmoderate = 0.0308 # "p_H" in paper
prcritical = 0.0132 # "p_C" in paper
# Model question: What are the actual probabilities?
# These probabilities seem to be based on current testing,
# but almost all testing has been for people who feel sick,
# so it seems _possible_ that actual probabilities are much lower.
# Jack Ma offered 500000 test kits to the United States;
# why hasn't the country tested 10000 random people each week,
# to provide robust infection data in support of policy decisions?
# (This is _not_ an endorsement of the extremely risky notion
# that without such robust data we should avoid intervention.)
Ichange -= Ispeed*I
RRchange = prmild*Ispeed*I
HHchange = prmoderate*Ispeed*I
HCchange = prcritical*Ispeed*I
# Paper assumes that hospitalization lasts 8 days on average.
hospitalspeed = 1.0/8 # "delta_H" in paper
# Model question: What is the actual hospitalization time?
HHchange -= hospitalspeed*HH
RHchange = hospitalspeed*HH
HCchange -= hospitalspeed*HC
CCchange = hospitalspeed*HC
# Paper assumes that critical care lasts 10 days on average.
criticalcarespeed = 1.0/10
# Model question: What is the actual critical-care time?
CCchange -= criticalcarespeed*CC
RCchange = criticalcarespeed*CC
# 20-minute discretization of continuous model:
resolution = 1/72.0
day += resolution
S += Schange*resolution
E += Echange*resolution
I += Ichange*resolution
RR += RRchange*resolution
HH += HHchange*resolution
RH += RHchange*resolution
HC += HCchange*resolution
CC += CCchange*resolution
RC += RCchange*resolution
population = S+E+I+RR+HH+RH+HC+CC+RC
assert population >= 0.999999
assert population <= 1.000001
print('%.9f S %.9f E %.9f I %.9f RR %.9f HH %.9f RH %.9f HC %.9f CC %.9f RC %.9f total %.9f distancing %s'
% (day,S,E,I,RR,HH,RH,HC,CC,RC,population,distancing))