#!/usr/bin/env python3

# gigo-20200324.py

# This script _tries to_ independently recalculate,
# from first principles, the graphs in the March 2020 paper
# "Social distancing strategies for curbing the COVID-19 epidemic".

# This script is _not_ an endorsement of the paper's model,
# and is _not_ an endorsement of the paper's policy conclusions.
# The paper's critical <=60% assumption regarding distancing
# comes from a reference that actually says >=60%; this in turn
# undermines all of the quantitative conclusions in the paper.
# The paper also fails to model other NPIs and future vaccinations.
# See below for various comments and questions regarding the model.

# Using this script to recalculate Figure 3B in paper:
plottingscript = """
  ./gigo-20200324.py > gigo.out
  ( echo 'set xrange [0:1200]'
    echo 'set yrange [0:50]'
    echo 'set xtics ("2020" 0,"2021" 366,"2022" 731,"2023" 1096)'
    echo 'set terminal pdfcairo font "NotoMono" size 12,3'
    echo 'plot "gigo.out" using 1:(500000*$17) notitle with lines lt rgb "red" linewidth 3, \'
    echo '"gigo.out" using 1:(10000*($7+$11+$15+$17)) notitle with lines lt rgb "black" linewidth 3, \'
    echo '"gigo.out" using 1:(50*(1-$3)) notitle with lines lt rgb "green" linewidth 3, \'
    echo '"gigo.out" using 1:(37.5*$23) notitle with lines lt rgb "blue" linewidth 3'
  ) | gnuplot > gigo.pdf
"""

# The output seems to closely match the paper for 2020 and 2021,
# but some discrepancies build up in the March 2022 spike.
# (Of course one hopes for widespread vaccination already in 2021.)
# Clearly the paper's model has a mismatch with this script,
# or a mismatch with the paper's (unpublished?) software, or both.

# One candidate for a mismatch: the initial pulse.
# The long-term spike heights seem sensitive to the pulse details;
# the paper doesn't note this and doesn't clearly specify the pulse.
# This also brings the paper's 37.5 safety conclusion into question
# _even if_ the paper's stated assumptions are correct.

import math

# Paper's model splits population into nine disjoint subpopulations:
S = 1 # susceptible to disease but not exposed
E = 0 # exposed to disease
I = 0 # infectious, not hospitalized, not in critical care
      # (paper pointlessly splits I into IR+IH+IC)
RR = 0 # previously had mild infection
HH = 0 # hospitalized with moderate infection
RH = 0 # previously was hospitalized with moderate infection
HC = 0 # hospitalized with critical infection
CC = 0 # in critical care with critical infection
RC = 0 # previously in critical care

# Paper assumes that hospitalized/critical people don't infect.
# Model question: How realistic is this? Don't they infect doctors?
# Model question: Is it realistic to model infectiousness as yes/no?

# Paper assumes that previously infected people are immune.
# Model question: How realistic is this?

# Paper takes a yes/no model of distancing.
distancing = 0 # will always be 0 or 1

# Time scale: Let's count days from beginning of 1 January 2020.
establishment = 70 # paper starts model on 11 March 2020
day = establishment

# Paper stops in July 2022 (912 days).
while day < 1199.999999:

  # Paper assumes that R0 is between 2.0 and 2.5.
  R0 = 2.0 # best case considered in paper
  # Model question: What is the actual value?

  # Paper assumes that summer reduces R0 between 0% and 30%.
  summerimprovement = 0.3 # best case considered in paper
  # Paper assumes a smooth cosine curve for summer R0 reduction.
  wintershift = 26.6 # peak of winter before end of December; paper's 3.8 weeks
  angle = 2*math.pi*(day+wintershift)/365.25
  R0 *= 1-summerimprovement*0.5*(1-math.cos(angle))
  # Model question: What is the actual summer improvement?

  # Paper assumes that distancing reduces R0 by _at most_ 60%.
  distancingimpact = 0.6
  # Paper tries to justify this by citing reference 3 for the claim that
  # China's "intense" distancing reduced R0 by 60%. However, reference 3
  # actually says something very different: namely, if R0 started around
  # 2.5 then evidently China must have reduced R0 by _at least_ 60%.
  # This error in paper also undermines paper's 25%-70% conclusion.
  # Model question: What is the actual effect of distancing?

  # One of the distancing policies considered in paper:
  if day >= establishment+14: # no distancing before 2 weeks
    if I >= 37.5/10000: distancing = 1
    if I <= 10.0/10000: distancing = 0

  # Paper seems to claim that 37.5 is safe for critical-care capacity,
  # but paper does not give adequate calculations to justify this.

  # Futhermore, this policy obviously cannot be exactly implemented:
  # we don't have instant perfect I measurements. Would be more useful
  # to model implementable policies such as simple month-on-month-off,
  # or (if distancing impact is larger) two-weeks-on-six-weeks-off.

  # Paper advocates widespread surveillance to try to implement policy,
  # but does not analyze impact of delays and errors in surveillance.
  # Paper also claims that widespread surveillance is "necessary" for
  # intermittent distancing to be "effective"; but paper's model
  # includes scenarios where a simple month-on-month-off policy would
  # be effective, and such a policy does not require surveillance.
  # Having more data about population-infection rates would be useful
  # for many reasons, but paper's argument for this is wrong.

  R0 *= 1-distancing*distancingimpact

  # Paper's model also assumes that other NPIs fail to reduce R0.
  # Model question: What is impact of widespread hand-washing?
  # Model question: What is impact of widespread wearing of masks?

  # Paper's model also ignores, e.g., possibility of widespread
  # vaccination in mid-2021, which would rapidly decrease S
  # (assuming the vaccination is reasonably effective).
  
  # After repeating the standard recommendation to increase
  # critical-care capacity, paper says that this "allows" using such
  # capacity to infect more people now (by increasing the 37.5 above).
  # This is unethical: it will result in many unnecessary deaths,
  # under the reasonable assumption of future widespread vaccination.
  # Part of the problem here is the lack of vaccination in the paper's
  # model. Another part of the problem is that the paper sets (1) a
  # primary goal of not overrunning the critical-care system and (2) a
  # secondary goal of minimizing distancing time, instead of treating
  # both of these as tools to support the actual goal of positive health
  # outcomes. The paper's model doesn't even notice how many people die.

  # Paper assumes that infectiousness lasts 5 days on average.
  Ispeed = 1/5.0 # "gamma" in paper
  # Model question: What is the actual period of infectiousness?

  Sspeed = R0*Ispeed
  # For example, if R0 is 2 and Ispeed is 1/5:
  # infectious people become non-infectious at speed 1/5,
  # while they expose other people at speed 2/5 when S=1.
  # As S drops, exposure drops proportionally.

  Schange = -Sspeed*I*S
  Echange = Sspeed*I*S

  # Paper assumes that incubation lasts 5 days on average.
  Espeed = 1.0/5 # "v" in paper; not clear in paper but see reference 4
  # Model question: What is the actual incubation time?

  Echange -= Espeed*E
  Ichange = Espeed*E

  # Paper does not clearly state model of initial burst of infections.
  # As noted above, paper's 37.5 safety claim seems to depend on this.
  # Reference 4 says "small pulse in the force of infection",
  # namely "an increase of 0.01/week for one half week".
  if day < establishment+3:
    Schange -= 0.01/7
    Echange += 0.01/7

  # Paper's model has three possibilities for infection progressing.
  prmild = 0.956 # "p_R" in paper
  prmoderate = 0.0308 # "p_H" in paper
  prcritical = 0.0132 # "p_C" in paper
  # Model question: What are the actual probabilities?
  # These probabilities seem to be based on current testing,
  # but almost all testing has been for people who feel sick,
  # so it seems _possible_ that actual probabilities are much lower.
  # Jack Ma offered 500000 test kits to the United States;
  # why hasn't the country tested 10000 random people each week,
  # to provide robust infection data in support of policy decisions?
  # (This is _not_ an endorsement of the extremely risky notion
  # that without such robust data we should avoid intervention.)
  
  Ichange -= Ispeed*I
  RRchange = prmild*Ispeed*I
  HHchange = prmoderate*Ispeed*I
  HCchange = prcritical*Ispeed*I

  # Paper assumes that hospitalization lasts 8 days on average.
  hospitalspeed = 1.0/8 # "delta_H" in paper
  # Model question: What is the actual hospitalization time?

  HHchange -= hospitalspeed*HH
  RHchange = hospitalspeed*HH

  HCchange -= hospitalspeed*HC
  CCchange = hospitalspeed*HC

  # Paper assumes that critical care lasts 10 days on average.
  criticalcarespeed = 1.0/10
  # Model question: What is the actual critical-care time?

  CCchange -= criticalcarespeed*CC
  RCchange = criticalcarespeed*CC

  # 20-minute discretization of continuous model:
  resolution = 1/72.0
  day += resolution
  S += Schange*resolution
  E += Echange*resolution
  I += Ichange*resolution
  RR += RRchange*resolution
  HH += HHchange*resolution
  RH += RHchange*resolution
  HC += HCchange*resolution
  CC += CCchange*resolution
  RC += RCchange*resolution

  population = S+E+I+RR+HH+RH+HC+CC+RC
  assert population >= 0.999999
  assert population <= 1.000001

  print('%.9f S %.9f E %.9f I %.9f RR %.9f HH %.9f RH %.9f HC %.9f CC %.9f RC %.9f total %.9f distancing %s'
    % (day,S,E,I,RR,HH,RH,HC,CC,RC,population,distancing))